跳到内容

Hologres

HologresVectorStore #

基类: BasePydanticVectorStore

Hologres 向量存储。

Hologres 是一站式实时数据仓库,支持高性能 OLAP 分析和高 QPS 在线服务。Hologres 支持向量处理,允许使用向量数据来展示非结构化数据的特征。 https://www.alibabacloud.com/help/en/hologres/user-guide/introduction-to-vector-processing

源代码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-hologres/llama_index/vector_stores/hologres/base.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
class HologresVectorStore(BasePydanticVectorStore):
    """
    Hologres Vector Store.

    Hologres is a one-stop real-time data warehouse, which can support high performance OLAP analysis and high QPS online services.
    Hologres supports vector processing and allows you to use vector data
    to show the characteristics of unstructured data.
    https://www.alibabacloud.com/help/en/hologres/user-guide/introduction-to-vector-processing

    """

    # Hologres storage instance
    _storage: HologresVector = PrivateAttr()

    # Hologres vector db stores the document node's text as string.
    stores_text: bool = True

    def __init__(self, hologres_storage: HologresVector):
        """
        Construct from a Hologres storage instance.
        You can use from_connection_string instead.
        """
        super().__init__()
        self._storage = hologres_storage

    @classmethod
    def from_connection_string(
        cls,
        connection_string: str,
        table_name: str,
        table_schema: Dict[str, str] = {"document": "text"},
        embedding_dimension: int = 1536,
        pre_delete_table: bool = False,
    ) -> "HologresVectorStore":
        """
        Create Hologres Vector Store from connection string.

        Args:
            connection_string: connection string of hologres database
            table_name: table name to persist data
            table_schema: table column schemam
            embedding_dimension: dimension size of embedding vector
            pre_delete_table: whether to erase data from table on creation

        """
        hologres_storage = HologresVector(
            connection_string,
            ndims=embedding_dimension,
            table_name=table_name,
            table_schema=table_schema,
            pre_delete_table=pre_delete_table,
        )
        return cls(hologres_storage=hologres_storage)

    @classmethod
    def from_param(
        cls,
        host: str,
        port: int,
        user: str,
        password: str,
        database: str,
        table_name: str,
        table_schema: Dict[str, str] = {"document": "text"},
        embedding_dimension: int = 1536,
        pre_delete_table: bool = False,
    ) -> "HologresVectorStore":
        """
        Create Hologres Vector Store from database configurations.

        Args:
            host: host
            port: port number
            user: hologres user
            password: hologres password
            database: hologres database
            table_name: hologres table name
            table_schema: table column schemam
            embedding_dimension: dimension size of embedding vector
            pre_delete_table: whether to erase data from table on creation

        """
        connection_string = HologresVector.connection_string_from_db_params(
            host, port, database, user, password
        )
        return cls.from_connection_string(
            connection_string=connection_string,
            table_name=table_name,
            embedding_dimension=embedding_dimension,
            table_schema=table_schema,
            pre_delete_table=pre_delete_table,
        )

    @classmethod
    def class_name(cls) -> str:
        return "HologresVectorStore"

    @property
    def client(self) -> Any:
        return self._storage

    def add(
        self,
        nodes: List[BaseNode],
        **add_kwargs: Any,
    ) -> List[str]:
        """
        Add nodes to hologres index.

        Embedding data will be saved to `vector` column and text will be saved to `document` column.

        Args:
            nodes: List[BaseNode]: list of nodes with embeddings

        """
        embeddings = []
        node_ids = []
        schema_data_list = []
        meta_data_list = []

        for node in nodes:
            text_embedding = node.get_embedding()
            embeddings.append(text_embedding)
            node_ids.append(node.node_id)
            meta_data_list.append(node.metadata)
            schema_data_list.append(
                {"document": node.get_content(metadata_mode=MetadataMode.NONE)}
            )

        self._storage.upsert_vectors(
            embeddings, node_ids, meta_data_list, schema_data_list
        )
        return node_ids

    def query(
        self,
        query: VectorStoreQuery,
        **kwargs: Any,
    ) -> VectorStoreQueryResult:
        """
        Query index for top k most similar nodes.

        Args:
            query_embedding (List[float]): query embedding
            similarity_top_k (int): top k most similar nodes

        """
        query_embedding = cast(List[float], query.query_embedding)
        top_k = query.similarity_top_k

        query_results: List[dict[str, Any]] = self._storage.search(
            query_embedding,
            k=top_k,
            select_columns=["document", "vector"],
            metadata_filters=query.filters,
        )

        # if empty, then return an empty response
        if len(query_results) == 0:
            return VectorStoreQueryResult(similarities=[], ids=[])

        nodes = []
        similarities = []
        ids = []

        for result in query_results:
            node = TextNode(
                text=result["document"],
                id_=result["id"],
                embedding=result["vector"],
                metadata=result["metadata"],
            )
            nodes.append(node)
            ids.append(result["id"])
            similarities.append(math.exp(-result["distance"]))

        return VectorStoreQueryResult(nodes=nodes, similarities=similarities, ids=ids)

    def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
        """
        Delete nodes using with ref_doc_id.

        Args:
            ref_doc_id (str): The doc_id of the document to delete.

        """
        self._storage.delete_vectors(metadata_filters={"doc_id": ref_doc_id})

from_connection_string classmethod #

from_connection_string(connection_string: str, table_name: str, table_schema: Dict[str, str] = {'document': 'text'}, embedding_dimension: int = 1536, pre_delete_table: bool = False) -> HologresVectorStore

从连接字符串创建 Hologres 向量存储。

参数

名称 类型 描述 默认值
connection_string str

Hologres 数据库的连接字符串

必需
table_name str

用于持久化数据的表名

必需
table_schema Dict[str, str]

表列模式

{'document': 'text'}
embedding_dimension int

嵌入向量的维度大小

1536
pre_delete_table bool

创建时是否清除表中的数据

False
源代码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-hologres/llama_index/vector_stores/hologres/base.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
@classmethod
def from_connection_string(
    cls,
    connection_string: str,
    table_name: str,
    table_schema: Dict[str, str] = {"document": "text"},
    embedding_dimension: int = 1536,
    pre_delete_table: bool = False,
) -> "HologresVectorStore":
    """
    Create Hologres Vector Store from connection string.

    Args:
        connection_string: connection string of hologres database
        table_name: table name to persist data
        table_schema: table column schemam
        embedding_dimension: dimension size of embedding vector
        pre_delete_table: whether to erase data from table on creation

    """
    hologres_storage = HologresVector(
        connection_string,
        ndims=embedding_dimension,
        table_name=table_name,
        table_schema=table_schema,
        pre_delete_table=pre_delete_table,
    )
    return cls(hologres_storage=hologres_storage)

from_param classmethod #

from_param(host: str, port: int, user: str, password: str, database: str, table_name: str, table_schema: Dict[str, str] = {'document': 'text'}, embedding_dimension: int = 1536, pre_delete_table: bool = False) -> HologresVectorStore

从数据库配置创建 Hologres 向量存储。

参数

名称 类型 描述 默认值
host str

host

必需
port int

端口号

必需
user str

Hologres 用户

必需
password str

Hologres 密码

必需
database str

Hologres 数据库

必需
table_name str

Hologres 表名

必需
table_schema Dict[str, str]

表列模式

{'document': 'text'}
embedding_dimension int

嵌入向量的维度大小

1536
pre_delete_table bool

创建时是否清除表中的数据

False
源代码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-hologres/llama_index/vector_stores/hologres/base.py
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
@classmethod
def from_param(
    cls,
    host: str,
    port: int,
    user: str,
    password: str,
    database: str,
    table_name: str,
    table_schema: Dict[str, str] = {"document": "text"},
    embedding_dimension: int = 1536,
    pre_delete_table: bool = False,
) -> "HologresVectorStore":
    """
    Create Hologres Vector Store from database configurations.

    Args:
        host: host
        port: port number
        user: hologres user
        password: hologres password
        database: hologres database
        table_name: hologres table name
        table_schema: table column schemam
        embedding_dimension: dimension size of embedding vector
        pre_delete_table: whether to erase data from table on creation

    """
    connection_string = HologresVector.connection_string_from_db_params(
        host, port, database, user, password
    )
    return cls.from_connection_string(
        connection_string=connection_string,
        table_name=table_name,
        embedding_dimension=embedding_dimension,
        table_schema=table_schema,
        pre_delete_table=pre_delete_table,
    )

add #

add(nodes: List[BaseNode], **add_kwargs: Any) -> List[str]

将节点添加到 Hologres 索引。

嵌入数据将保存到 vector 列,文本将保存到 document 列。

参数

名称 类型 描述 默认值
nodes List[BaseNode]

List[BaseNode]:带有嵌入的节点列表

必需
源代码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-hologres/llama_index/vector_stores/hologres/base.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
def add(
    self,
    nodes: List[BaseNode],
    **add_kwargs: Any,
) -> List[str]:
    """
    Add nodes to hologres index.

    Embedding data will be saved to `vector` column and text will be saved to `document` column.

    Args:
        nodes: List[BaseNode]: list of nodes with embeddings

    """
    embeddings = []
    node_ids = []
    schema_data_list = []
    meta_data_list = []

    for node in nodes:
        text_embedding = node.get_embedding()
        embeddings.append(text_embedding)
        node_ids.append(node.node_id)
        meta_data_list.append(node.metadata)
        schema_data_list.append(
            {"document": node.get_content(metadata_mode=MetadataMode.NONE)}
        )

    self._storage.upsert_vectors(
        embeddings, node_ids, meta_data_list, schema_data_list
    )
    return node_ids

query #

query(query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult

查询索引以获取前 k 个最相似的节点。

参数

名称 类型 描述 默认值
query_embedding List[float]

查询嵌入

必需
similarity_top_k int

前 k 个最相似的节点

必需
源代码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-hologres/llama_index/vector_stores/hologres/base.py
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
def query(
    self,
    query: VectorStoreQuery,
    **kwargs: Any,
) -> VectorStoreQueryResult:
    """
    Query index for top k most similar nodes.

    Args:
        query_embedding (List[float]): query embedding
        similarity_top_k (int): top k most similar nodes

    """
    query_embedding = cast(List[float], query.query_embedding)
    top_k = query.similarity_top_k

    query_results: List[dict[str, Any]] = self._storage.search(
        query_embedding,
        k=top_k,
        select_columns=["document", "vector"],
        metadata_filters=query.filters,
    )

    # if empty, then return an empty response
    if len(query_results) == 0:
        return VectorStoreQueryResult(similarities=[], ids=[])

    nodes = []
    similarities = []
    ids = []

    for result in query_results:
        node = TextNode(
            text=result["document"],
            id_=result["id"],
            embedding=result["vector"],
            metadata=result["metadata"],
        )
        nodes.append(node)
        ids.append(result["id"])
        similarities.append(math.exp(-result["distance"]))

    return VectorStoreQueryResult(nodes=nodes, similarities=similarities, ids=ids)

delete #

delete(ref_doc_id: str, **delete_kwargs: Any) -> None

使用 ref_doc_id 删除节点。

参数

名称 类型 描述 默认值
ref_doc_id str

要删除的文档的 doc_id。

必需
源代码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-hologres/llama_index/vector_stores/hologres/base.py
200
201
202
203
204
205
206
207
208
def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
    """
    Delete nodes using with ref_doc_id.

    Args:
        ref_doc_id (str): The doc_id of the document to delete.

    """
    self._storage.delete_vectors(metadata_filters={"doc_id": ref_doc_id})