如何在Golang程序中使用Weaviate将OpenAi嵌入向量存储
#openai #go #vectordatabase #weaviate

最近,我想使用矢量数据库存储OpenAI嵌入向量。
我已经研究了各种矢量数据库。像Pinecone,Redis,Qdrant,Milvus,Weaviate,Chroma,Pgvector。比较它们后,我选择了编织作为矢量数据库。编织是在Golang开发的,这使我更容易安装和测试。

如何在Debian 12上安装编织

首先,下载编织

转到github.com/weaviate/weaviate,然后找到适合您操作系统的版本。
我选择下载weaviate-v1.20.3-linux-amd64.tar.gz

mkdir vectordb
cd vectordb
wget https://github.com/weaviate/weaviate/releases/download/v1.20.3/weaviate-v1.20.3-linux-amd64.tar.gz

然后安装编织

首先解压缩文件,然后添加配置文件。

tar -zxvf weaviate-v1.20.3-linux-amd64.tar.gz

您需要在下面配置身份验证和数据持久性环境。我们使用API​​身份验证进行测试。

export AUTHENTICATION_APIKEY_ENABLED=true
export AUTHENTICATION_APIKEY_ALLOWED_KEYS=q8Z5HN4U2w8jUDRGeGxyKkqvwjPg7w4P
export AUTHENTICATION_APIKEY_USERS=newbing@example.com
export PERSISTENCE_DATA_PATH=/home/newbing/vectordb/data

启动编织

解压缩文件并设置环境后,您可以启动编织的实例。

./weaviate --host=127.0.0.1 --port=8181 --scheme=http

与主管一起启动

如果您不想启动或停止,则可以使用主管来管理编织实例。
主管配置文件:

[program:weaviate]
directory=/home/newbing/vectordb/
command=/home/newbing/vectordb/weaviate --host=127.0.0.1 --port=8181 --scheme=http
autostart=true
autorestart=true
user=www
environment=HOME="/home/www",AUTHENTICATION_APIKEY_ENABLED="true",AUTHENTICATION_APIKEY_ALLOWED_KEYS="q8Z5HN4U2w7jUDRGeGxyKkqvwjPg7w4P",AUTHENTICATION_APIKEY_USERS="newbing@example.com",PERSISTENCE_DATA_PATH="/home/newbing/vectordb/data"
numprocs=1
redirect_stderr=true
stdout_logfile=/var/log/supervisor/weaviate.log

在Golang中使用编织

创建编织客户端

package vector

import (
    "github.com/rs/zerolog/log"
    "github.com/spf13/viper"
    "github.com/weaviate/weaviate-go-client/v4/weaviate"
    "github.com/weaviate/weaviate-go-client/v4/weaviate/auth"
    "net/url"
)

var (
    _cli *weaviate.Client
)

func GetClient() (*weaviate.Client, error) {
    if _cli != nil {
        return _cli, nil
    }
    uri, err := url.Parse(viper.GetString("weaviate.api"))
    if err != nil {
        log.Error().Err(err).
            Str("func", "init").
            Str("package", "vector").
            Msg("parse api addr failed")
        return nil, err
    }
    cfg := weaviate.Config{
        Host:       uri.Host,
        Scheme:     uri.Scheme,
        AuthConfig: auth.ApiKey{Value: viper.GetString("weaviate.key")},
    }
    client, err := weaviate.NewClient(cfg)
    if err != nil {
        log.Error().Err(err).
            Str("func", "init").
            Str("package", "vector").
            Msg("create client failed")
        client = nil
    }
    _cli = client
    return client, err
}

存储向量编织


// Create store vector to weaviate.
// class is schema or table name,
// props are the attributes of the data,
// vector is the embeddings from openai
func Create(ctx context.Context, class string, props map[string]string, vector []float32) (*models.Object, error) {
    cli, err := GetClient()
    if err != nil {
        return nil, err
    }
    created, err := cli.Data().Creator().
        WithClassName(class).
        WithProperties(props).
        WithVector(vector).
        Do(ctx)

    if err != nil {
        return nil, err
    }
    return created.Object, nil
}

相似性搜索向量

// Near search vectors with similarity
// class is schema or table name,
// fields are the attributes of the data will be return,
// vector is the embeddings from openai of source compare data,
// maxDistance the max distance of searched data, between 0 from 1, small is better
// limit how many data rows to return
func Near(ctx context.Context, class string, fields []string, vector []float32, maxDistance float32, limit int) ([]any, error) {
    if limit == 0 {
        limit = 10
    }
    cli, err := GetClient()
    if err != nil {
        return nil, err
    }
    gqlField := make([]graphql.Field, len(fields)+1)
    for _, field := range fields {
        gqlField = append(gqlField, graphql.Field{Name: field})
    }
    _additional := graphql.Field{
        Name: "_additional", Fields: []graphql.Field{
            {Name: "certainty"}, // only supported if distance==cosine
            {Name: "distance"},  // always supported
        },
    }
    gqlField = append(gqlField, _additional)

    nearVector := cli.GraphQL().NearVectorArgBuilder().
        WithVector(vector).       // Replace with a compatible vector
        WithDistance(maxDistance) // set the max distance

    res, err := cli.GraphQL().Get().
        WithClassName(class).
        WithFields(gqlField...).
        WithNearVector(nearVector).
        WithLimit(limit).
        Do(ctx)
    if err != nil {
        return nil, err
    }
    retList := make([]any, 0)
    if getRes, ok := res.Data["Get"]; ok {
        getMap, ok := getRes.(map[string]any)
        if ok {
            list, ok := getMap[class]
            if ok {
                retList, ok := list.([]any)
                if ok {
                    return retList, nil
                } else {
                    return nil, errors.New("data not array list")
                }
            } else {
                return nil, errors.New("data not found")
            }
        } else {
            return nil, errors.New("no get data found")
        }
    }
    return retList, nil
}

通过属性查找向量

// FindByAttribute find vector data by attribute.
// class is schema or table name,
// fields are the attributes of the data will be return,
// key the attribute to compare
// value the compare value
func FindByAttribute(ctx context.Context, class string, fields []string, key, value string) ([]any, error) {
    retList := make([]any, 0)
    cli, err := GetClient()
    if err != nil {
        return retList, err
    }
    gqlField := make([]graphql.Field, len(fields)+1)
    for _, field := range fields {
        gqlField = append(gqlField, graphql.Field{Name: field})
    }
    _additional := graphql.Field{
        Name: "_additional", Fields: []graphql.Field{
            {Name: "vector"}, // always supported
        },
    }
    gqlField = append(gqlField, _additional)

    res, err := cli.GraphQL().Get().
        WithClassName(class).
        WithFields(gqlField...).
        WithWhere(filters.Where().WithPath([]string{key}).WithOperator(filters.Equal).WithValueString(value)).
        Do(ctx)
    if err != nil {
        return nil, err
    }
    if getRes, ok := res.Data["Get"]; ok {
        getMap, ok := getRes.(map[string]any)
        if ok {
            list, ok := getMap[class]
            if ok {
                retList, ok := list.([]any)
                if ok {
                    return retList, nil
                } else {
                    return nil, errors.New("data not array list")
                }
            } else {
                return nil, errors.New("data not found")
            }
        } else {
            return nil, errors.New("no get data found")
        }
    }
    return retList, nil
}

整个编织商店,搜索,查找代码

package vector

import (
    "context"
    "errors"
    "github.com/weaviate/weaviate-go-client/v4/weaviate/filters"
    "github.com/weaviate/weaviate-go-client/v4/weaviate/graphql"
    "github.com/weaviate/weaviate/entities/models"
)

// Create store vector to weaviate.
// class is schema or table name,
// props are the attributes of the data,
// vector is the embeddings from openai
func Create(ctx context.Context, class string, props map[string]string, vector []float32) (*models.Object, error) {
    cli, err := GetClient()
    if err != nil {
        return nil, err
    }
    created, err := cli.Data().Creator().
        WithClassName(class).
        WithProperties(props).
        WithVector(vector).
        Do(ctx)

    if err != nil {
        return nil, err
    }
    return created.Object, nil
}

// Near search vectors with similarity
// class is schema or table name,
// fields are the attributes of the data will be return,
// vector is the embeddings from openai of source compare data,
// maxDistance the max distance of searched data, between 0 from 1, small is better
// limit how many data rows to return
func Near(ctx context.Context, class string, fields []string, vector []float32, maxDistance float32, limit int) ([]any, error) {
    if limit == 0 {
        limit = 10
    }
    cli, err := GetClient()
    if err != nil {
        return nil, err
    }
    gqlField := make([]graphql.Field, len(fields)+1)
    for _, field := range fields {
        gqlField = append(gqlField, graphql.Field{Name: field})
    }
    _additional := graphql.Field{
        Name: "_additional", Fields: []graphql.Field{
            {Name: "certainty"}, // only supported if distance==cosine
            {Name: "distance"},  // always supported
        },
    }
    gqlField = append(gqlField, _additional)

    nearVector := cli.GraphQL().NearVectorArgBuilder().
        WithVector(vector).       // Replace with a compatible vector
        WithDistance(maxDistance) // set the max distance

    res, err := cli.GraphQL().Get().
        WithClassName(class).
        WithFields(gqlField...).
        WithNearVector(nearVector).
        WithLimit(limit).
        Do(ctx)
    if err != nil {
        return nil, err
    }
    retList := make([]any, 0)
    if getRes, ok := res.Data["Get"]; ok {
        getMap, ok := getRes.(map[string]any)
        if ok {
            list, ok := getMap[class]
            if ok {
                retList, ok := list.([]any)
                if ok {
                    return retList, nil
                } else {
                    return nil, errors.New("data not array list")
                }
            } else {
                return nil, errors.New("data not found")
            }
        } else {
            return nil, errors.New("no get data found")
        }
    }
    return retList, nil
}

// FindByAttribute find vector data by attribute.
// class is schema or table name,
// fields are the attributes of the data will be return,
// key the attribute to compare
// value the compare value
func FindByAttribute(ctx context.Context, class string, fields []string, key, value string) ([]any, error) {
    retList := make([]any, 0)
    cli, err := GetClient()
    if err != nil {
        return retList, err
    }
    gqlField := make([]graphql.Field, len(fields)+1)
    for _, field := range fields {
        gqlField = append(gqlField, graphql.Field{Name: field})
    }
    _additional := graphql.Field{
        Name: "_additional", Fields: []graphql.Field{
            {Name: "vector"}, // always supported
        },
    }
    gqlField = append(gqlField, _additional)

    res, err := cli.GraphQL().Get().
        WithClassName(class).
        WithFields(gqlField...).
        WithWhere(filters.Where().WithPath([]string{key}).WithOperator(filters.Equal).WithValueString(value)).
        Do(ctx)
    if err != nil {
        return nil, err
    }
    if getRes, ok := res.Data["Get"]; ok {
        getMap, ok := getRes.(map[string]any)
        if ok {
            list, ok := getMap[class]
            if ok {
                retList, ok := list.([]any)
                if ok {
                    return retList, nil
                } else {
                    return nil, errors.New("data not array list")
                }
            } else {
                return nil, errors.New("data not found")
            }
        } else {
            return nil, errors.New("no get data found")
        }
    }
    return retList, nil
}

最后,我想介绍我的新项目:gpt2api。

什么是GPT2API?

网站:https://aicanvas.app/gpt
GPT2API是一个平台,可帮助您构建API,以使ChatGpt更易于使用。您可以构建API并与社区共享,也可以调用API.HUB的API,这是社区共享的其他API。

特征:

  1. 用chatgpt命令构建API。
  2. 网站上的测试API。
  3. 与社区分享。
  4. 让社区扩展了API。
  5. 您项目的示例代码。
  6. 致电Chatgpt的便宜价格,$ 1售价为60万个令牌。

如果您对GPT2API或编程有任何疑问,则可以在Twitter上与我联系。欢迎您体验GPT2API。我希望得到您的评论。