跳到内容

Jaguar

JaguarVectorStore #

基类:BasePydanticVectorStore

Jaguar 向量存储。

参见 http://www.jaguardb.com 参见 http://github.com/fserv/jaguar-sdk

示例

pip install llama-index-vector-stores-jaguar

from llama_index.vector_stores.jaguar import JaguarVectorStore
vectorstore = JaguarVectorStore(
    pod = 'vdb',
    store = 'mystore',
    vector_index = 'v',
    vector_type = 'cosine_fraction_float',
    vector_dimension = 1536,
    url='http://192.168.8.88:8080/fwww/',
)
源码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/llama_index/vector_stores/jaguar/base.py
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
class JaguarVectorStore(BasePydanticVectorStore):
    """
    Jaguar vector store.

    See http://www.jaguardb.com
    See http://github.com/fserv/jaguar-sdk

    Examples:
        `pip install llama-index-vector-stores-jaguar`

        ```python
        from llama_index.vector_stores.jaguar import JaguarVectorStore
        vectorstore = JaguarVectorStore(
            pod = 'vdb',
            store = 'mystore',
            vector_index = 'v',
            vector_type = 'cosine_fraction_float',
            vector_dimension = 1536,
            url='http://192.168.8.88:8080/fwww/',
        )
        ```

    """

    stores_text: bool = True

    _pod: str = PrivateAttr()
    _store: str = PrivateAttr()
    _vector_index: str = PrivateAttr()
    _vector_type: str = PrivateAttr()
    _vector_dimension: int = PrivateAttr()
    _jag: JaguarHttpClient = PrivateAttr()
    _token: str = PrivateAttr()

    def __init__(
        self,
        pod: str,
        store: str,
        vector_index: str,
        vector_type: str,
        vector_dimension: int,
        url: str,
    ):
        """
        Constructor of JaguarVectorStore.

        Args:
            pod: str:  name of the pod (database)
            store: str:  name of vector store in the pod
            vector_index: str:  name of vector index of the store
            vector_type: str:  type of the vector index
            vector_dimension: int:  dimension of the vector index
            url: str:  URL end point of jaguar http server

        """
        super().__init__(stores_text=True)
        self._pod = self._sanitize_input(pod)
        self._store = self._sanitize_input(store)
        self._vector_index = self._sanitize_input(vector_index)
        self._vector_type = self._sanitize_input(vector_type)
        self._vector_dimension = vector_dimension
        self._jag = JaguarHttpClient(url)
        self._token = ""

    def __del__(self) -> None:
        pass

    @classmethod
    def class_name(cls) -> str:
        return "JaguarVectorStore"

    @property
    def client(self) -> Any:
        """Get client."""
        return self._jag

    def _sanitize_input(self, value: str) -> str:
        """Sanitize input to prevent SQL injection."""
        forbidden_chars = ['"', ";", "--", "/*", "*/"]
        sanitized = value.replace("'", "\\'")
        for char in forbidden_chars:
            sanitized = sanitized.replace(char, "")
        return sanitized

    def add(
        self,
        nodes: Sequence[BaseNode],
        **add_kwargs: Any,
    ) -> List[str]:
        """
        Add nodes to index.

        Args:
            nodes: List[BaseNode]: list of nodes with embeddings

        """
        use_node_metadata = add_kwargs.get("use_node_metadata", False)
        ids = []
        for node in nodes:
            text = node.get_text()
            embedding = node.get_embedding()
            if use_node_metadata is True:
                metadata = node.metadata
            else:
                metadata = None
            zid = self.add_text(text, embedding, metadata, **add_kwargs)
            ids.append(zid)

        return ids

    def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
        """
        Delete nodes using with ref_doc_id.

        Args:
            ref_doc_id (str): The doc_id of the document to delete.

        """
        podstore = self._pod + "." + self._store
        q = (
            "delete from "
            + podstore
            + " where zid='"
            + self._sanitize_input(ref_doc_id)
            + "'"
        )
        self.run(q)

    def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
        """
        Query index for top k most similar nodes.

        Args:
            query: VectorStoreQuery object
            kwargs:  may contain 'where', 'metadata_fields', 'args', 'fetch_k'

        """
        embedding = query.query_embedding
        k = query.similarity_top_k
        (nodes, ids, simscores) = self.similarity_search_with_score(
            embedding, k=k, form="node", **kwargs
        )
        return VectorStoreQueryResult(nodes=nodes, ids=ids, similarities=simscores)

    def load_documents(
        self, embedding: List[float], k: int, **kwargs: Any
    ) -> List[Document]:
        """
        Query index to load top k most similar documents.

        Args:
            embedding: a list of floats
            k: topK number
            kwargs:  may contain 'where', 'metadata_fields', 'args', 'fetch_k'

        """
        return cast(
            List[Document],
            self.similarity_search_with_score(embedding, k=k, form="doc", **kwargs),
        )

    def create(
        self,
        metadata_fields: str,
        text_size: int,
    ) -> None:
        """
        Create the vector store on the backend database.

        Args:
            metadata_fields (str):  exrta metadata columns and types
        Returns:
            True if successful; False if not successful

        """
        podstore = self._pod + "." + self._store

        """
        v:text column is required.
        """
        q = "create store "
        q += podstore
        q += f" ({self._vector_index} vector({self._vector_dimension},"
        q += f" '{self._vector_type}'),"
        q += f"  v:text char({text_size}),"
        q += self._sanitize_input(metadata_fields) + ")"
        self.run(q)

    def add_text(
        self,
        text: str,
        embedding: List[float],
        metadata: Optional[dict] = None,
        **kwargs: Any,
    ) -> str:
        """
        Add  texts through the embeddings and add to the vectorstore.

        Args:
          texts: text string to add to the jaguar vector store.
          embedding: embedding vector of the text, list of floats
          metadata: {'file_path': '../data/paul_graham/paul_graham_essay.txt',
                          'file_name': 'paul_graham_essay.txt',
                          'file_type': 'text/plain',
                          'file_size': 75042,
                          'creation_date': '2023-12-24',
                          'last_modified_date': '2023-12-24',
                          'last_accessed_date': '2023-12-28'}
          kwargs: vector_index=name_of_vector_index
                  file_column=name_of_file_column
                  metadata={...}

        Returns:
            id from adding the text into the vectorstore

        """
        text = self._sanitize_input(text)
        vcol = self._vector_index
        filecol = kwargs.get("file_column", "")
        text_tag = kwargs.get("text_tag", "")

        if text_tag != "":
            text = text_tag + " " + text

        podstorevcol = self._pod + "." + self._store + "." + vcol
        q = "textcol " + podstorevcol
        js = self.run(q)
        if js == "":
            return ""
        textcol = js["data"]

        zid = ""
        if metadata is None:
            ### no metadata and no files to upload
            str_vec = [str(x) for x in embedding]
            values_comma = self._sanitize_input(",".join(str_vec))
            podstore = self._pod + "." + self._store
            q = "insert into " + podstore + " ("
            q += vcol + "," + textcol + ") values ('" + values_comma
            q += "','" + text + "')"
            js = self.run(q, False)
            zid = js["zid"]
        else:
            str_vec = [str(x) for x in embedding]
            nvec, vvec, filepath = self._parseMeta(metadata, filecol)
            if filecol != "":
                rc = self._jag.postFile(self._token, filepath, 1)
                if not rc:
                    return ""
            names_comma = ",".join(nvec)
            names_comma += "," + vcol
            names_comma = self._sanitize_input(names_comma)
            ## col1,col2,col3,vecl

            if vvec is not None and len(vvec) > 0:
                values_comma = "'" + "','".join(vvec) + "'"
            else:
                values_comma = "'" + "','".join(vvec) + "'"

            ### 'va1','val2','val3'
            values_comma += ",'" + ",".join(str_vec) + "'"
            values_comma = self._sanitize_input(values_comma)
            ### 'v1,v2,v3'
            podstore = self._pod + "." + self._store
            q = "insert into " + podstore + " ("
            q += names_comma + "," + textcol + ") values (" + values_comma
            q += ",'" + text + "')"
            if filecol != "":
                js = self.run(q, True)
            else:
                js = self.run(q, False)
            zid = js["zid"]

        return zid

    def similarity_search_with_score(
        self,
        embedding: Optional[List[float]],
        k: int = 3,
        form: str = "node",
        **kwargs: Any,
    ) -> Union[Tuple[List[TextNode], List[str], List[float]], List[Document]]:
        """
        Return nodes most similar to query embedding, along with ids and scores.

        Args:
            embedding: embedding of text to look up.
            k: Number of nodes to return. Defaults to 3.
            form: if "node", return Tuple[List[TextNode], List[str], List[float]]
                  if "doc", return List[Document]
            kwargs: may have where, metadata_fields, args, fetch_k
        Returns:
            Tuple(list of nodes, list of ids, list of similaity scores)

        """
        where = kwargs.get("where")
        metadata_fields = kwargs.get("metadata_fields")

        args = kwargs.get("args")
        fetch_k = kwargs.get("fetch_k", -1)

        vcol = self._vector_index
        vtype = self._vector_type
        if embedding is None:
            return ([], [], [])
        str_embeddings = [str(f) for f in embedding]
        qv_comma = self._sanitize_input(",".join(str_embeddings))
        podstore = self._pod + "." + self._store
        q = (
            "select similarity("
            + vcol
            + ",'"
            + qv_comma
            + "','topk="
            + str(k)
            + ",fetch_k="
            + str(fetch_k)
            + ",type="
            + vtype
        )
        q += ",with_score=yes,with_text=yes"
        if args is not None:
            q += "," + args

        if metadata_fields is not None:
            x = "&".join(metadata_fields)
            q += ",metadata=" + x

        q += "') from " + podstore

        if where is not None:
            q += " where " + self._sanitize_input(where)

        jarr = self.run(q)

        if jarr is None:
            return ([], [], [])

        nodes = []
        ids = []
        simscores = []
        docs = []
        for js in jarr:
            score = js["score"]
            text = js["text"]
            zid = js["zid"]

            md = {}
            md["zid"] = zid
            if metadata_fields is not None:
                for m in metadata_fields:
                    mv = js[m]
                    md[m] = mv

            if form == "node":
                node = TextNode(
                    id_=zid,
                    text=text,
                    metadata=md,
                )
                nodes.append(node)
                ids.append(zid)
                simscores.append(float(score))
            else:
                doc = Document(
                    id_=zid,
                    text=text,
                    metadata=md,
                )
                docs.append(doc)

        if form == "node":
            return (nodes, ids, simscores)
        else:
            return docs

    def is_anomalous(
        self,
        node: BaseNode,
        **kwargs: Any,
    ) -> bool:
        """
        Detect if given text is anomalous from the dataset.

        Args:
            query: Text to detect if it is anomaly
        Returns:
            True or False

        """
        vcol = self._vector_index
        vtype = self._vector_type
        str_embeddings = [str(f) for f in node.get_embedding()]
        qv_comma = ",".join(str_embeddings)
        podstore = self._pod + "." + self._store
        q = "select anomalous(" + vcol + ", '" + qv_comma + "', 'type=" + vtype + "')"
        q += " from " + podstore

        js = self.run(q)
        if isinstance(js, list) and len(js) == 0:
            return False
        jd = json.loads(js[0])
        return jd["anomalous"] == "YES"


    def run(self, query: str, withFile: bool = False) -> dict:
        """
        Run any query statement in jaguardb.

        Args:
            query (str): query statement to jaguardb
        Returns:
            None for invalid token, or
            json result string

        """
        if self._token == "":
            logger.error(f"E0005 error run({query})")
            return {}

        resp = self._jag.post(query, self._token, withFile)
        txt = resp.text
        try:
            return json.loads(txt)
        except Exception:
            return {}

    def count(self) -> int:
        """
        Count records of a store in jaguardb.

        Args: no args
        Returns: (int) number of records in pod store
        """
        podstore = self._pod + "." + self._store
        q = "select count() from " + podstore
        js = self.run(q)
        if isinstance(js, list) and len(js) == 0:
            return 0
        jd = json.loads(js[0])
        return int(jd["data"])

    def clear(self) -> None:
        """
        Delete all records in jaguardb.

        Args: No args
        Returns: None
        """
        podstore = self._pod + "." + self._store
        q = "truncate store " + podstore
        self.run(q)

    def drop(self) -> None:
        """
        Drop or remove a store in jaguardb.

        Args: no args
        Returns: None
        """
        podstore = self._pod + "." + self._store
        q = "drop store " + podstore
        self.run(q)

    def prt(self, msg: str) -> None:
        nows = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        with open("/tmp/debugjaguar.log", "a") as file:
            print(f"{nows} msg={msg}", file=file, flush=True)

    def login(
        self,
        jaguar_api_key: Optional[str] = "",
    ) -> bool:
        """
        Login to jaguar server with a jaguar_api_key or let self._jag find a key.

        Args:
            optional jaguar_api_key (str): API key of user to jaguardb server
        Returns:
            True if successful; False if not successful

        """
        if jaguar_api_key == "":
            jaguar_api_key = self._jag.getApiKey()
        self._jaguar_api_key = jaguar_api_key
        self._token = self._jag.login(jaguar_api_key)
        if self._token == "":
            logger.error("E0001 error init(): invalid jaguar_api_key")
            return False
        return True

    def logout(self) -> None:
        """
        Logout to cleanup resources.

        Args: no args
        Returns: None
        """
        self._jag.logout(self._token)

    def _parseMeta(self, nvmap: dict, filecol: str) -> Tuple[List[str], List[str], str]:
        filepath = ""
        if filecol == "":
            nvec = list(nvmap.keys())
            vvec = list(nvmap.values())
        else:
            nvec = []
            vvec = []
            if filecol in nvmap:
                nvec.append(filecol)
                vvec.append(nvmap[filecol])
                filepath = nvmap[filecol]

            for k, v in nvmap.items():
                if k != filecol:
                    nvec.append(k)
                    vvec.append(v)

        return nvec, vvec, filepath

client property #

client: Any

获取客户端。

add #

add(nodes: Sequence[BaseNode], **add_kwargs: Any) -> List[str]

将节点添加到索引。

参数

名称 类型 描述 默认值
nodes Sequence[BaseNode]

List[BaseNode]:带有嵌入的节点列表

必需
源码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/llama_index/vector_stores/jaguar/base.py
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
def add(
    self,
    nodes: Sequence[BaseNode],
    **add_kwargs: Any,
) -> List[str]:
    """
    Add nodes to index.

    Args:
        nodes: List[BaseNode]: list of nodes with embeddings

    """
    use_node_metadata = add_kwargs.get("use_node_metadata", False)
    ids = []
    for node in nodes:
        text = node.get_text()
        embedding = node.get_embedding()
        if use_node_metadata is True:
            metadata = node.metadata
        else:
            metadata = None
        zid = self.add_text(text, embedding, metadata, **add_kwargs)
        ids.append(zid)

    return ids

delete #

delete(ref_doc_id: str, **delete_kwargs: Any) -> None

使用 ref_doc_id 删除节点。

参数

名称 类型 描述 默认值
ref_doc_id str

要删除的文档的 doc_id。

必需
源码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/llama_index/vector_stores/jaguar/base.py
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
    """
    Delete nodes using with ref_doc_id.

    Args:
        ref_doc_id (str): The doc_id of the document to delete.

    """
    podstore = self._pod + "." + self._store
    q = (
        "delete from "
        + podstore
        + " where zid='"
        + self._sanitize_input(ref_doc_id)
        + "'"
    )
    self.run(q)

query #

query(query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult

查询索引以获取前 k 个最相似的节点。

参数

名称 类型 描述 默认值
query VectorStoreQuery

VectorStoreQuery 对象

必需
kwargs Any

可能包含 'where', 'metadata_fields', 'args', 'fetch_k'

{}
源码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/llama_index/vector_stores/jaguar/base.py
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
    """
    Query index for top k most similar nodes.

    Args:
        query: VectorStoreQuery object
        kwargs:  may contain 'where', 'metadata_fields', 'args', 'fetch_k'

    """
    embedding = query.query_embedding
    k = query.similarity_top_k
    (nodes, ids, simscores) = self.similarity_search_with_score(
        embedding, k=k, form="node", **kwargs
    )
    return VectorStoreQueryResult(nodes=nodes, ids=ids, similarities=simscores)

load_documents #

load_documents(embedding: List[float], k: int, **kwargs: Any) -> List[Document]

查询索引以加载前 k 个最相似的文档。

参数

名称 类型 描述 默认值
embedding List[float]

浮点数列表

必需
k int

topK 数量

必需
kwargs Any

可能包含 'where', 'metadata_fields', 'args', 'fetch_k'

{}
源码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/llama_index/vector_stores/jaguar/base.py
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
def load_documents(
    self, embedding: List[float], k: int, **kwargs: Any
) -> List[Document]:
    """
    Query index to load top k most similar documents.

    Args:
        embedding: a list of floats
        k: topK number
        kwargs:  may contain 'where', 'metadata_fields', 'args', 'fetch_k'

    """
    return cast(
        List[Document],
        self.similarity_search_with_score(embedding, k=k, form="doc", **kwargs),
    )

create #

create(metadata_fields: str, text_size: int) -> None

在后端数据库上创建向量存储。

参数

名称 类型 描述 默认值
metadata_fields str

额外的元数据列和类型

必需

返回值:成功返回 True;失败返回 False

源码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/llama_index/vector_stores/jaguar/base.py
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
def create(
    self,
    metadata_fields: str,
    text_size: int,
) -> None:
    """
    Create the vector store on the backend database.

    Args:
        metadata_fields (str):  exrta metadata columns and types
    Returns:
        True if successful; False if not successful

    """
    podstore = self._pod + "." + self._store

    """
    v:text column is required.
    """
    q = "create store "
    q += podstore
    q += f" ({self._vector_index} vector({self._vector_dimension},"
    q += f" '{self._vector_type}'),"
    q += f"  v:text char({text_size}),"
    q += self._sanitize_input(metadata_fields) + ")"
    self.run(q)

add_text #

add_text(text: str, embedding: List[float], metadata: Optional[dict] = None, **kwargs: Any) -> str

通过嵌入添加文本并添加到向量存储。

参数

名称 类型 描述 默认值
texts

要添加到 jaguar 向量存储的文本字符串。

必需
embedding List[float]

文本的嵌入向量,浮点数列表

必需
metadata Optional[dict]

{'file_path': '../data/paul_graham/paul_graham_essay.txt', 'file_name': 'paul_graham_essay.txt', 'file_type': 'text/plain', 'file_size': 75042, 'creation_date': '2023-12-24', 'last_modified_date': '2023-12-24', 'last_accessed_date': '2023-12-28'}

kwargs Any

vector_index=name_of_vector_index file_column=name_of_file_column metadata={...}

{}

返回值

类型 描述
str

将文本添加到向量存储后返回的 id

源码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/llama_index/vector_stores/jaguar/base.py
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
def add_text(
    self,
    text: str,
    embedding: List[float],
    metadata: Optional[dict] = None,
    **kwargs: Any,
) -> str:
    """
    Add  texts through the embeddings and add to the vectorstore.

    Args:
      texts: text string to add to the jaguar vector store.
      embedding: embedding vector of the text, list of floats
      metadata: {'file_path': '../data/paul_graham/paul_graham_essay.txt',
                      'file_name': 'paul_graham_essay.txt',
                      'file_type': 'text/plain',
                      'file_size': 75042,
                      'creation_date': '2023-12-24',
                      'last_modified_date': '2023-12-24',
                      'last_accessed_date': '2023-12-28'}
      kwargs: vector_index=name_of_vector_index
              file_column=name_of_file_column
              metadata={...}

    Returns:
        id from adding the text into the vectorstore

    """
    text = self._sanitize_input(text)
    vcol = self._vector_index
    filecol = kwargs.get("file_column", "")
    text_tag = kwargs.get("text_tag", "")

    if text_tag != "":
        text = text_tag + " " + text

    podstorevcol = self._pod + "." + self._store + "." + vcol
    q = "textcol " + podstorevcol
    js = self.run(q)
    if js == "":
        return ""
    textcol = js["data"]

    zid = ""
    if metadata is None:
        ### no metadata and no files to upload
        str_vec = [str(x) for x in embedding]
        values_comma = self._sanitize_input(",".join(str_vec))
        podstore = self._pod + "." + self._store
        q = "insert into " + podstore + " ("
        q += vcol + "," + textcol + ") values ('" + values_comma
        q += "','" + text + "')"
        js = self.run(q, False)
        zid = js["zid"]
    else:
        str_vec = [str(x) for x in embedding]
        nvec, vvec, filepath = self._parseMeta(metadata, filecol)
        if filecol != "":
            rc = self._jag.postFile(self._token, filepath, 1)
            if not rc:
                return ""
        names_comma = ",".join(nvec)
        names_comma += "," + vcol
        names_comma = self._sanitize_input(names_comma)
        ## col1,col2,col3,vecl

        if vvec is not None and len(vvec) > 0:
            values_comma = "'" + "','".join(vvec) + "'"
        else:
            values_comma = "'" + "','".join(vvec) + "'"

        ### 'va1','val2','val3'
        values_comma += ",'" + ",".join(str_vec) + "'"
        values_comma = self._sanitize_input(values_comma)
        ### 'v1,v2,v3'
        podstore = self._pod + "." + self._store
        q = "insert into " + podstore + " ("
        q += names_comma + "," + textcol + ") values (" + values_comma
        q += ",'" + text + "')"
        if filecol != "":
            js = self.run(q, True)
        else:
            js = self.run(q, False)
        zid = js["zid"]

    return zid

similarity_search_with_score #

similarity_search_with_score(embedding: Optional[List[float]], k: int = 3, form: str = 'node', **kwargs: Any) -> Union[Tuple[List[TextNode], List[str], List[float]], List[Document]]

返回与查询嵌入最相似的节点,以及它们的 id 和分数。

参数

名称 类型 描述 默认值
embedding Optional[List[float]]

要查找文本的嵌入。

必需
k int

要返回的节点数量。默认为 3。

3
form str

如果是 "node",返回 Tuple[List[TextNode], List[str], List[float]];如果是 "doc",返回 List[Document]

'node'
kwargs Any

可能包含 where, metadata_fields, args, fetch_k

{}

返回值:Tuple (节点列表, id 列表, 相似度分数列表)

源码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/llama_index/vector_stores/jaguar/base.py
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
def similarity_search_with_score(
    self,
    embedding: Optional[List[float]],
    k: int = 3,
    form: str = "node",
    **kwargs: Any,
) -> Union[Tuple[List[TextNode], List[str], List[float]], List[Document]]:
    """
    Return nodes most similar to query embedding, along with ids and scores.

    Args:
        embedding: embedding of text to look up.
        k: Number of nodes to return. Defaults to 3.
        form: if "node", return Tuple[List[TextNode], List[str], List[float]]
              if "doc", return List[Document]
        kwargs: may have where, metadata_fields, args, fetch_k
    Returns:
        Tuple(list of nodes, list of ids, list of similaity scores)

    """
    where = kwargs.get("where")
    metadata_fields = kwargs.get("metadata_fields")

    args = kwargs.get("args")
    fetch_k = kwargs.get("fetch_k", -1)

    vcol = self._vector_index
    vtype = self._vector_type
    if embedding is None:
        return ([], [], [])
    str_embeddings = [str(f) for f in embedding]
    qv_comma = self._sanitize_input(",".join(str_embeddings))
    podstore = self._pod + "." + self._store
    q = (
        "select similarity("
        + vcol
        + ",'"
        + qv_comma
        + "','topk="
        + str(k)
        + ",fetch_k="
        + str(fetch_k)
        + ",type="
        + vtype
    )
    q += ",with_score=yes,with_text=yes"
    if args is not None:
        q += "," + args

    if metadata_fields is not None:
        x = "&".join(metadata_fields)
        q += ",metadata=" + x

    q += "') from " + podstore

    if where is not None:
        q += " where " + self._sanitize_input(where)

    jarr = self.run(q)

    if jarr is None:
        return ([], [], [])

    nodes = []
    ids = []
    simscores = []
    docs = []
    for js in jarr:
        score = js["score"]
        text = js["text"]
        zid = js["zid"]

        md = {}
        md["zid"] = zid
        if metadata_fields is not None:
            for m in metadata_fields:
                mv = js[m]
                md[m] = mv

        if form == "node":
            node = TextNode(
                id_=zid,
                text=text,
                metadata=md,
            )
            nodes.append(node)
            ids.append(zid)
            simscores.append(float(score))
        else:
            doc = Document(
                id_=zid,
                text=text,
                metadata=md,
            )
            docs.append(doc)

    if form == "node":
        return (nodes, ids, simscores)
    else:
        return docs

is_anomalous #

is_anomalous(node: BaseNode, **kwargs: Any) -> bool

检测给定文本是否是数据集中的异常值。

参数

名称 类型 描述 默认值
query

要检测是否为异常的文本

必需

返回值:True 或 False

源码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/llama_index/vector_stores/jaguar/base.py
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
def is_anomalous(
    self,
    node: BaseNode,
    **kwargs: Any,
) -> bool:
    """
    Detect if given text is anomalous from the dataset.

    Args:
        query: Text to detect if it is anomaly
    Returns:
        True or False

    """
    vcol = self._vector_index
    vtype = self._vector_type
    str_embeddings = [str(f) for f in node.get_embedding()]
    qv_comma = ",".join(str_embeddings)
    podstore = self._pod + "." + self._store
    q = "select anomalous(" + vcol + ", '" + qv_comma + "', 'type=" + vtype + "')"
    q += " from " + podstore

    js = self.run(q)
    if isinstance(js, list) and len(js) == 0:
        return False
    jd = json.loads(js[0])
    return jd["anomalous"] == "YES"

run #

run(query: str, withFile: bool = False) -> dict

在 jaguardb 中运行任何查询语句。

参数

名称 类型 描述 默认值
query str

发送给 jaguardb 的查询语句

必需

返回值:无效 token 返回 None,否则返回 json 结果字符串

源码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/llama_index/vector_stores/jaguar/base.py
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
def run(self, query: str, withFile: bool = False) -> dict:
    """
    Run any query statement in jaguardb.

    Args:
        query (str): query statement to jaguardb
    Returns:
        None for invalid token, or
        json result string

    """
    if self._token == "":
        logger.error(f"E0005 error run({query})")
        return {}

    resp = self._jag.post(query, self._token, withFile)
    txt = resp.text
    try:
        return json.loads(txt)
    except Exception:
        return {}

count #

count() -> int

计算 jaguardb 中某个存储的记录数。

参数:无 参数 返回值:(int) pod 存储中的记录数

源码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/llama_index/vector_stores/jaguar/base.py
459
460
461
462
463
464
465
466
467
468
469
470
471
472
def count(self) -> int:
    """
    Count records of a store in jaguardb.

    Args: no args
    Returns: (int) number of records in pod store
    """
    podstore = self._pod + "." + self._store
    q = "select count() from " + podstore
    js = self.run(q)
    if isinstance(js, list) and len(js) == 0:
        return 0
    jd = json.loads(js[0])
    return int(jd["data"])

clear #

clear() -> None

删除 jaguardb 中的所有记录。

参数:无 参数 返回值:None

源码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/llama_index/vector_stores/jaguar/base.py
474
475
476
477
478
479
480
481
482
483
def clear(self) -> None:
    """
    Delete all records in jaguardb.

    Args: No args
    Returns: None
    """
    podstore = self._pod + "." + self._store
    q = "truncate store " + podstore
    self.run(q)

drop #

drop() -> None

在 jaguardb 中删除或移除某个存储。

参数:无 参数 返回值:None

源码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/llama_index/vector_stores/jaguar/base.py
485
486
487
488
489
490
491
492
493
494
def drop(self) -> None:
    """
    Drop or remove a store in jaguardb.

    Args: no args
    Returns: None
    """
    podstore = self._pod + "." + self._store
    q = "drop store " + podstore
    self.run(q)

login #

login(jaguar_api_key: Optional[str] = '') -> bool

使用 jaguar_api_key 登录 jaguar 服务器,或让 self._jag 查找密钥。

参数

名称 类型 描述 默认值
可选 jaguar_api_key (str

用户访问 jaguardb 服务器的 API 密钥

必需

返回值:成功返回 True;失败返回 False

源码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/llama_index/vector_stores/jaguar/base.py
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
def login(
    self,
    jaguar_api_key: Optional[str] = "",
) -> bool:
    """
    Login to jaguar server with a jaguar_api_key or let self._jag find a key.

    Args:
        optional jaguar_api_key (str): API key of user to jaguardb server
    Returns:
        True if successful; False if not successful

    """
    if jaguar_api_key == "":
        jaguar_api_key = self._jag.getApiKey()
    self._jaguar_api_key = jaguar_api_key
    self._token = self._jag.login(jaguar_api_key)
    if self._token == "":
        logger.error("E0001 error init(): invalid jaguar_api_key")
        return False
    return True

logout #

logout() -> None

登出以清理资源。

参数:无 参数 返回值:None

源码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-jaguar/llama_index/vector_stores/jaguar/base.py
523
524
525
526
527
528
529
530
def logout(self) -> None:
    """
    Logout to cleanup resources.

    Args: no args
    Returns: None
    """
    self._jag.logout(self._token)