MongoDB Atlas 向量存储¶
如果您在 Colab 上打开此 Notebook,您可能需要安装 LlamaIndex 🦙。
In [ ]
已复制!
%pip install llama-index-vector-stores-mongodb
%pip install llama-index-vector-stores-mongodb
In [ ]
已复制!
!pip install llama-index
!pip install llama-index
In [ ]
已复制!
# Provide URI to constructor, or use environment variable
import pymongo
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
from llama_index.core import VectorStoreIndex
from llama_index.core import StorageContext
from llama_index.core import SimpleDirectoryReader
# Provide URI to constructor, or use environment variable import pymongo from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch from llama_index.core import VectorStoreIndex from llama_index.core import StorageContext from llama_index.core import SimpleDirectoryReader
下载数据
In [ ]
已复制!
!mkdir -p 'data/10k/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/uber_2021.pdf' -O 'data/10k/uber_2021.pdf'
!mkdir -p 'data/10k/' !wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/10k/uber_2021.pdf' -O 'data/10k/uber_2021.pdf'
In [ ]
已复制!
# mongo_uri = os.environ["MONGO_URI"]
mongo_uri = (
"mongodb+srv://<username>:<password>@<host>?retryWrites=true&w=majority"
)
mongodb_client = pymongo.MongoClient(mongo_uri)
store = MongoDBAtlasVectorSearch(mongodb_client)
store.create_vector_search_index(
dimensions=1536, path="embedding", similarity="cosine"
)
storage_context = StorageContext.from_defaults(vector_store=store)
uber_docs = SimpleDirectoryReader(
input_files=["./data/10k/uber_2021.pdf"]
).load_data()
index = VectorStoreIndex.from_documents(
uber_docs, storage_context=storage_context
)
# mongo_uri = os.environ["MONGO_URI"] mongo_uri = ( "mongodb+srv://:@?retryWrites=true&w=majority" ) mongodb_client = pymongo.MongoClient(mongo_uri) store = MongoDBAtlasVectorSearch(mongodb_client) store.create_vector_search_index( dimensions=1536, path="embedding", similarity="cosine" ) storage_context = StorageContext.from_defaults(vector_store=store) uber_docs = SimpleDirectoryReader( input_files=["./data/10k/uber_2021.pdf"] ).load_data() index = VectorStoreIndex.from_documents( uber_docs, storage_context=storage_context )
In [ ]
已复制!
response = index.as_query_engine().query("What was Uber's revenue?")
display(Markdown(f"<b>{response}</b>"))
response = index.as_query_engine().query("What was Uber's revenue?") display(Markdown(f"{response}"))
Uber 在 2021 年的收入为 174.55 亿美元。
In [ ]
已复制!
from llama_index.core import Response
# Initial size
print(store._collection.count_documents({}))
# Get a ref_doc_id
typed_response = (
response if isinstance(response, Response) else response.get_response()
)
ref_doc_id = typed_response.source_nodes[0].node.ref_doc_id
print(store._collection.count_documents({"metadata.ref_doc_id": ref_doc_id}))
# Test store delete
if ref_doc_id:
store.delete(ref_doc_id)
print(store._collection.count_documents({}))
from llama_index.core import Response # Initial size print(store._collection.count_documents({})) # Get a ref_doc_id typed_response = ( response if isinstance(response, Response) else response.get_response() ) ref_doc_id = typed_response.source_nodes[0].node.ref_doc_id print(store._collection.count_documents({"metadata.ref_doc_id": ref_doc_id})) # Test store delete if ref_doc_id: store.delete(ref_doc_id) print(store._collection.count_documents({}))
4454 1 4453
注意:对于 MongoDB Atlas,您必须创建一个 Atlas 搜索索引。