Dynamodb

DynamoDBVectorStore #

基类: BasePydanticVectorStore

DynamoDB 向量存储。

在此向量存储中，嵌入存储在 DynamoDB 表中。此类的实现参考了 SimpleVectorStore。

参数

名称	类型	描述	默认值
`dynamodb_kvstore`	`DynamoDBKVStore`	数据存储	必需
`namespace`	`可选[str]`	namespace	`无`

示例

pip install llama-index-vector-stores-dynamodb

from llama_index.vector_stores.dynamodb import DynamoDBVectorStore

vector_store = DynamoDBVectorStore.from_table_name(table_name="my_table")

源代码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-dynamodb/llama_index/vector_stores/dynamodb/base.py

class DynamoDBVectorStore(BasePydanticVectorStore):
    """
    DynamoDB Vector Store.

    In this vector store, embeddings are stored within dynamodb table.
    This class was implemented with reference to SimpleVectorStore.

    Args:
        dynamodb_kvstore (DynamoDBKVStore): data store
        namespace (Optional[str]): namespace

    Examples:
        `pip install llama-index-vector-stores-dynamodb`

        ```python
        from llama_index.vector_stores.dynamodb import DynamoDBVectorStore

        vector_store = DynamoDBVectorStore.from_table_name(table_name="my_table")
        ```

    """

    stores_text: bool = False

    _kvstore: DynamoDBKVStore = PrivateAttr()
    _collection_embedding: str = PrivateAttr()
    _collection_text_id_to_doc_id: str = PrivateAttr()
    _key_value: str = PrivateAttr()

    def __init__(
        self, dynamodb_kvstore: DynamoDBKVStore, namespace: str | None = None
    ) -> None:
        """Initialize params."""
        super().__init__()

        self._kvstore = dynamodb_kvstore
        namespace = namespace or DEFAULT_NAMESPACE
        self._collection_embedding = f"{namespace}/embedding"
        self._collection_text_id_to_doc_id = f"{namespace}/text_id_to_doc_id"
        self._key_value = "value"

    @classmethod
    def from_table_name(
        cls, table_name: str, namespace: str | None = None
    ) -> DynamoDBVectorStore:
        """Load from DynamoDB table name."""
        dynamodb_kvstore = DynamoDBKVStore.from_table_name(table_name=table_name)
        return cls(dynamodb_kvstore=dynamodb_kvstore, namespace=namespace)

    @classmethod
    def class_name(cls) -> str:
        return "DynamoDBVectorStore"

    @property
    def client(self) -> None:
        """Get client."""
        return

    def get(self, text_id: str) -> List[float]:
        """Get embedding."""
        item = self._kvstore.get(key=text_id, collection=self._collection_embedding)
        item = cast(Dict[str, List[float]], item)
        return item[self._key_value]

    def add(self, nodes: List[BaseNode], **add_kwargs: Any) -> List[str]:
        """Add nodes to index."""
        response = []
        for node in nodes:
            self._kvstore.put(
                key=node.node_id,
                val={self._key_value: node.get_embedding()},
                collection=self._collection_embedding,
            )
            self._kvstore.put(
                key=node.node_id,
                val={self._key_value: node.ref_doc_id},
                collection=self._collection_text_id_to_doc_id,
            )
            response.append(node.node_id)
        return response

    def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
        """
        Delete nodes using with ref_doc_id.

        Args:
            ref_doc_id (str): The doc_id of the document to delete.

        """
        text_ids_to_delete = set()
        for text_id, item in self._kvstore.get_all(
            collection=self._collection_text_id_to_doc_id
        ).items():
            if ref_doc_id == item[self._key_value]:
                text_ids_to_delete.add(text_id)

        for text_id in text_ids_to_delete:
            self._kvstore.delete(key=text_id, collection=self._collection_embedding)
            self._kvstore.delete(
                key=text_id, collection=self._collection_text_id_to_doc_id
            )

    def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
        """Get nodes for response."""
        if query.filters is not None:
            raise ValueError(
                "Metadata filters not implemented for SimpleVectorStore yet."
            )

        # TODO: consolidate with get_query_text_embedding_similarities
        items = self._kvstore.get_all(collection=self._collection_embedding).items()

        if query.node_ids:
            available_ids = set(query.node_ids)

            node_ids = [k for k, _ in items if k in available_ids]
            embeddings = [v[self._key_value] for k, v in items if k in available_ids]
        else:
            node_ids = [k for k, _ in items]
            embeddings = [v[self._key_value] for k, v in items]

        query_embedding = cast(List[float], query.query_embedding)
        if query.mode in LEARNER_MODES:
            top_similarities, top_ids = get_top_k_embeddings_learner(
                query_embedding=query_embedding,
                embeddings=embeddings,
                similarity_top_k=query.similarity_top_k,
                embedding_ids=node_ids,
            )
        elif query.mode == VectorStoreQueryMode.DEFAULT:
            top_similarities, top_ids = get_top_k_embeddings(
                query_embedding=query_embedding,
                embeddings=embeddings,
                similarity_top_k=query.similarity_top_k,
                embedding_ids=node_ids,
            )
        else:
            raise ValueError(f"Invalid query mode: {query.mode}")

        return VectorStoreQueryResult(similarities=top_similarities, ids=top_ids)

client `property` #

client: None

获取客户端。

from_table_name `classmethod` #

from_table_name(table_name: str, namespace: str | None = None) -> DynamoDBVectorStore

从 DynamoDB 表名加载。

源代码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-dynamodb/llama_index/vector_stores/dynamodb/base.py

@classmethod
def from_table_name(
    cls, table_name: str, namespace: str | None = None
) -> DynamoDBVectorStore:
    """Load from DynamoDB table name."""
    dynamodb_kvstore = DynamoDBKVStore.from_table_name(table_name=table_name)
    return cls(dynamodb_kvstore=dynamodb_kvstore, namespace=namespace)

get #

get(text_id: str) -> List[float]

获取嵌入。

源代码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-dynamodb/llama_index/vector_stores/dynamodb/base.py

def get(self, text_id: str) -> List[float]:
    """Get embedding."""
    item = self._kvstore.get(key=text_id, collection=self._collection_embedding)
    item = cast(Dict[str, List[float]], item)
    return item[self._key_value]

add #

add(nodes: List[BaseNode], **add_kwargs: Any) -> List[str]

添加节点到索引。

源代码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-dynamodb/llama_index/vector_stores/dynamodb/base.py

def add(self, nodes: List[BaseNode], **add_kwargs: Any) -> List[str]:
    """Add nodes to index."""
    response = []
    for node in nodes:
        self._kvstore.put(
            key=node.node_id,
            val={self._key_value: node.get_embedding()},
            collection=self._collection_embedding,
        )
        self._kvstore.put(
            key=node.node_id,
            val={self._key_value: node.ref_doc_id},
            collection=self._collection_text_id_to_doc_id,
        )
        response.append(node.node_id)
    return response

delete #

delete(ref_doc_id: str, **delete_kwargs: Any) -> None

使用 ref_doc_id 删除节点。

参数

名称	类型	描述	默认值
`ref_doc_id`	`str`	要删除文档的 doc_id。	必需

源代码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-dynamodb/llama_index/vector_stores/dynamodb/base.py

def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
    """
    Delete nodes using with ref_doc_id.

    Args:
        ref_doc_id (str): The doc_id of the document to delete.

    """
    text_ids_to_delete = set()
    for text_id, item in self._kvstore.get_all(
        collection=self._collection_text_id_to_doc_id
    ).items():
        if ref_doc_id == item[self._key_value]:
            text_ids_to_delete.add(text_id)

    for text_id in text_ids_to_delete:
        self._kvstore.delete(key=text_id, collection=self._collection_embedding)
        self._kvstore.delete(
            key=text_id, collection=self._collection_text_id_to_doc_id
        )

query #

query(query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult

获取响应节点。

源代码位于 llama-index-integrations/vector_stores/llama-index-vector-stores-dynamodb/llama_index/vector_stores/dynamodb/base.py

def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
    """Get nodes for response."""
    if query.filters is not None:
        raise ValueError(
            "Metadata filters not implemented for SimpleVectorStore yet."
        )

    # TODO: consolidate with get_query_text_embedding_similarities
    items = self._kvstore.get_all(collection=self._collection_embedding).items()

    if query.node_ids:
        available_ids = set(query.node_ids)

        node_ids = [k for k, _ in items if k in available_ids]
        embeddings = [v[self._key_value] for k, v in items if k in available_ids]
    else:
        node_ids = [k for k, _ in items]
        embeddings = [v[self._key_value] for k, v in items]

    query_embedding = cast(List[float], query.query_embedding)
    if query.mode in LEARNER_MODES:
        top_similarities, top_ids = get_top_k_embeddings_learner(
            query_embedding=query_embedding,
            embeddings=embeddings,
            similarity_top_k=query.similarity_top_k,
            embedding_ids=node_ids,
        )
    elif query.mode == VectorStoreQueryMode.DEFAULT:
        top_similarities, top_ids = get_top_k_embeddings(
            query_embedding=query_embedding,
            embeddings=embeddings,
            similarity_top_k=query.similarity_top_k,
            embedding_ids=node_ids,
        )
    else:
        raise ValueError(f"Invalid query mode: {query.mode}")

    return VectorStoreQueryResult(similarities=top_similarities, ids=top_ids)

Dynamodb

DynamoDBVectorStore #

client property #

from_table_name classmethod #

get #

add #

delete #

query #

client `property` #

from_table_name `classmethod` #