最近,我想使用矢量数据库存储OpenAI嵌入向量。
我已经研究了各种矢量数据库。像Pinecone,Redis,Qdrant,Milvus,Weaviate,Chroma,Pgvector。比较它们后,我选择了编织作为矢量数据库。编织是在Golang开发的,这使我更容易安装和测试。
如何在Debian 12上安装编织
首先,下载编织
转到github.com/weaviate/weaviate,然后找到适合您操作系统的版本。
我选择下载weaviate-v1.20.3-linux-amd64.tar.gz
mkdir vectordb
cd vectordb
wget https://github.com/weaviate/weaviate/releases/download/v1.20.3/weaviate-v1.20.3-linux-amd64.tar.gz
然后安装编织
首先解压缩文件,然后添加配置文件。
tar -zxvf weaviate-v1.20.3-linux-amd64.tar.gz
您需要在下面配置身份验证和数据持久性环境。我们使用API身份验证进行测试。
export AUTHENTICATION_APIKEY_ENABLED=true
export AUTHENTICATION_APIKEY_ALLOWED_KEYS=q8Z5HN4U2w8jUDRGeGxyKkqvwjPg7w4P
export AUTHENTICATION_APIKEY_USERS=newbing@example.com
export PERSISTENCE_DATA_PATH=/home/newbing/vectordb/data
启动编织
解压缩文件并设置环境后,您可以启动编织的实例。
./weaviate --host=127.0.0.1 --port=8181 --scheme=http
与主管一起启动
如果您不想启动或停止,则可以使用主管来管理编织实例。
主管配置文件:
[program:weaviate]
directory=/home/newbing/vectordb/
command=/home/newbing/vectordb/weaviate --host=127.0.0.1 --port=8181 --scheme=http
autostart=true
autorestart=true
user=www
environment=HOME="/home/www",AUTHENTICATION_APIKEY_ENABLED="true",AUTHENTICATION_APIKEY_ALLOWED_KEYS="q8Z5HN4U2w7jUDRGeGxyKkqvwjPg7w4P",AUTHENTICATION_APIKEY_USERS="newbing@example.com",PERSISTENCE_DATA_PATH="/home/newbing/vectordb/data"
numprocs=1
redirect_stderr=true
stdout_logfile=/var/log/supervisor/weaviate.log
在Golang中使用编织
创建编织客户端
package vector
import (
"github.com/rs/zerolog/log"
"github.com/spf13/viper"
"github.com/weaviate/weaviate-go-client/v4/weaviate"
"github.com/weaviate/weaviate-go-client/v4/weaviate/auth"
"net/url"
)
var (
_cli *weaviate.Client
)
func GetClient() (*weaviate.Client, error) {
if _cli != nil {
return _cli, nil
}
uri, err := url.Parse(viper.GetString("weaviate.api"))
if err != nil {
log.Error().Err(err).
Str("func", "init").
Str("package", "vector").
Msg("parse api addr failed")
return nil, err
}
cfg := weaviate.Config{
Host: uri.Host,
Scheme: uri.Scheme,
AuthConfig: auth.ApiKey{Value: viper.GetString("weaviate.key")},
}
client, err := weaviate.NewClient(cfg)
if err != nil {
log.Error().Err(err).
Str("func", "init").
Str("package", "vector").
Msg("create client failed")
client = nil
}
_cli = client
return client, err
}
存储向量编织
// Create store vector to weaviate.
// class is schema or table name,
// props are the attributes of the data,
// vector is the embeddings from openai
func Create(ctx context.Context, class string, props map[string]string, vector []float32) (*models.Object, error) {
cli, err := GetClient()
if err != nil {
return nil, err
}
created, err := cli.Data().Creator().
WithClassName(class).
WithProperties(props).
WithVector(vector).
Do(ctx)
if err != nil {
return nil, err
}
return created.Object, nil
}
相似性搜索向量
// Near search vectors with similarity
// class is schema or table name,
// fields are the attributes of the data will be return,
// vector is the embeddings from openai of source compare data,
// maxDistance the max distance of searched data, between 0 from 1, small is better
// limit how many data rows to return
func Near(ctx context.Context, class string, fields []string, vector []float32, maxDistance float32, limit int) ([]any, error) {
if limit == 0 {
limit = 10
}
cli, err := GetClient()
if err != nil {
return nil, err
}
gqlField := make([]graphql.Field, len(fields)+1)
for _, field := range fields {
gqlField = append(gqlField, graphql.Field{Name: field})
}
_additional := graphql.Field{
Name: "_additional", Fields: []graphql.Field{
{Name: "certainty"}, // only supported if distance==cosine
{Name: "distance"}, // always supported
},
}
gqlField = append(gqlField, _additional)
nearVector := cli.GraphQL().NearVectorArgBuilder().
WithVector(vector). // Replace with a compatible vector
WithDistance(maxDistance) // set the max distance
res, err := cli.GraphQL().Get().
WithClassName(class).
WithFields(gqlField...).
WithNearVector(nearVector).
WithLimit(limit).
Do(ctx)
if err != nil {
return nil, err
}
retList := make([]any, 0)
if getRes, ok := res.Data["Get"]; ok {
getMap, ok := getRes.(map[string]any)
if ok {
list, ok := getMap[class]
if ok {
retList, ok := list.([]any)
if ok {
return retList, nil
} else {
return nil, errors.New("data not array list")
}
} else {
return nil, errors.New("data not found")
}
} else {
return nil, errors.New("no get data found")
}
}
return retList, nil
}
通过属性查找向量
// FindByAttribute find vector data by attribute.
// class is schema or table name,
// fields are the attributes of the data will be return,
// key the attribute to compare
// value the compare value
func FindByAttribute(ctx context.Context, class string, fields []string, key, value string) ([]any, error) {
retList := make([]any, 0)
cli, err := GetClient()
if err != nil {
return retList, err
}
gqlField := make([]graphql.Field, len(fields)+1)
for _, field := range fields {
gqlField = append(gqlField, graphql.Field{Name: field})
}
_additional := graphql.Field{
Name: "_additional", Fields: []graphql.Field{
{Name: "vector"}, // always supported
},
}
gqlField = append(gqlField, _additional)
res, err := cli.GraphQL().Get().
WithClassName(class).
WithFields(gqlField...).
WithWhere(filters.Where().WithPath([]string{key}).WithOperator(filters.Equal).WithValueString(value)).
Do(ctx)
if err != nil {
return nil, err
}
if getRes, ok := res.Data["Get"]; ok {
getMap, ok := getRes.(map[string]any)
if ok {
list, ok := getMap[class]
if ok {
retList, ok := list.([]any)
if ok {
return retList, nil
} else {
return nil, errors.New("data not array list")
}
} else {
return nil, errors.New("data not found")
}
} else {
return nil, errors.New("no get data found")
}
}
return retList, nil
}
整个编织商店,搜索,查找代码
package vector
import (
"context"
"errors"
"github.com/weaviate/weaviate-go-client/v4/weaviate/filters"
"github.com/weaviate/weaviate-go-client/v4/weaviate/graphql"
"github.com/weaviate/weaviate/entities/models"
)
// Create store vector to weaviate.
// class is schema or table name,
// props are the attributes of the data,
// vector is the embeddings from openai
func Create(ctx context.Context, class string, props map[string]string, vector []float32) (*models.Object, error) {
cli, err := GetClient()
if err != nil {
return nil, err
}
created, err := cli.Data().Creator().
WithClassName(class).
WithProperties(props).
WithVector(vector).
Do(ctx)
if err != nil {
return nil, err
}
return created.Object, nil
}
// Near search vectors with similarity
// class is schema or table name,
// fields are the attributes of the data will be return,
// vector is the embeddings from openai of source compare data,
// maxDistance the max distance of searched data, between 0 from 1, small is better
// limit how many data rows to return
func Near(ctx context.Context, class string, fields []string, vector []float32, maxDistance float32, limit int) ([]any, error) {
if limit == 0 {
limit = 10
}
cli, err := GetClient()
if err != nil {
return nil, err
}
gqlField := make([]graphql.Field, len(fields)+1)
for _, field := range fields {
gqlField = append(gqlField, graphql.Field{Name: field})
}
_additional := graphql.Field{
Name: "_additional", Fields: []graphql.Field{
{Name: "certainty"}, // only supported if distance==cosine
{Name: "distance"}, // always supported
},
}
gqlField = append(gqlField, _additional)
nearVector := cli.GraphQL().NearVectorArgBuilder().
WithVector(vector). // Replace with a compatible vector
WithDistance(maxDistance) // set the max distance
res, err := cli.GraphQL().Get().
WithClassName(class).
WithFields(gqlField...).
WithNearVector(nearVector).
WithLimit(limit).
Do(ctx)
if err != nil {
return nil, err
}
retList := make([]any, 0)
if getRes, ok := res.Data["Get"]; ok {
getMap, ok := getRes.(map[string]any)
if ok {
list, ok := getMap[class]
if ok {
retList, ok := list.([]any)
if ok {
return retList, nil
} else {
return nil, errors.New("data not array list")
}
} else {
return nil, errors.New("data not found")
}
} else {
return nil, errors.New("no get data found")
}
}
return retList, nil
}
// FindByAttribute find vector data by attribute.
// class is schema or table name,
// fields are the attributes of the data will be return,
// key the attribute to compare
// value the compare value
func FindByAttribute(ctx context.Context, class string, fields []string, key, value string) ([]any, error) {
retList := make([]any, 0)
cli, err := GetClient()
if err != nil {
return retList, err
}
gqlField := make([]graphql.Field, len(fields)+1)
for _, field := range fields {
gqlField = append(gqlField, graphql.Field{Name: field})
}
_additional := graphql.Field{
Name: "_additional", Fields: []graphql.Field{
{Name: "vector"}, // always supported
},
}
gqlField = append(gqlField, _additional)
res, err := cli.GraphQL().Get().
WithClassName(class).
WithFields(gqlField...).
WithWhere(filters.Where().WithPath([]string{key}).WithOperator(filters.Equal).WithValueString(value)).
Do(ctx)
if err != nil {
return nil, err
}
if getRes, ok := res.Data["Get"]; ok {
getMap, ok := getRes.(map[string]any)
if ok {
list, ok := getMap[class]
if ok {
retList, ok := list.([]any)
if ok {
return retList, nil
} else {
return nil, errors.New("data not array list")
}
} else {
return nil, errors.New("data not found")
}
} else {
return nil, errors.New("no get data found")
}
}
return retList, nil
}
最后,我想介绍我的新项目:gpt2api。
什么是GPT2API?
网站:https://aicanvas.app/gpt
GPT2API是一个平台,可帮助您构建API,以使ChatGpt更易于使用。您可以构建API并与社区共享,也可以调用API.HUB的API,这是社区共享的其他API。
特征:
- 用chatgpt命令构建API。
- 网站上的测试API。
- 与社区分享。
- 让社区扩展了API。
- 您项目的示例代码。
- 致电Chatgpt的便宜价格,$ 1售价为60万个令牌。
如果您对GPT2API或编程有任何疑问,则可以在Twitter上与我联系。欢迎您体验GPT2API。我希望得到您的评论。