MyScale 向量存储¶
在本 Notebook 中,我们将展示如何快速使用 MyScaleVectorStore。
如果您在 Colab 中打开此 Notebook,可能需要安装 LlamaIndex 🦙。
In [ ]
已复制!
%pip install llama-index-vector-stores-myscale
%pip install llama-index-vector-stores-myscale
In [ ]
已复制!
!pip install llama-index
!pip install llama-index
创建 MyScale 客户端¶
In [ ]
已复制!
import logging
import sys
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
import logging import sys logging.basicConfig(stream=sys.stdout, level=logging.INFO) logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
In [ ]
已复制!
from os import environ
import clickhouse_connect
environ["OPENAI_API_KEY"] = "sk-*"
# initialize client
client = clickhouse_connect.get_client(
host="YOUR_CLUSTER_HOST",
port=8443,
username="YOUR_USERNAME",
password="YOUR_CLUSTER_PASSWORD",
)
from os import environ import clickhouse_connect environ["OPENAI_API_KEY"] = "sk-*" # initialize client client = clickhouse_connect.get_client( host="YOUR_CLUSTER_HOST", port=8443, username="YOUR_USERNAME", password="YOUR_CLUSTER_PASSWORD", )
使用 MyScaleVectorStore 加载文档、构建和存储 VectorStoreIndex¶
在这里,我们将使用 Paul Graham 的一系列文章,将其转换为嵌入向量的文本,存储在 MyScaleVectorStore
中,并通过查询找到用于 LLM 问答循环的上下文。
In [ ]
已复制!
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores.myscale import MyScaleVectorStore
from IPython.display import Markdown, display
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader from llama_index.vector_stores.myscale import MyScaleVectorStore from IPython.display import Markdown, display
In [ ]
已复制!
# load documents
documents = SimpleDirectoryReader("../data/paul_graham").load_data()
print("Document ID:", documents[0].doc_id)
print("Number of Documents: ", len(documents))
# load documents documents = SimpleDirectoryReader("../data/paul_graham").load_data() print("Document ID:", documents[0].doc_id) print("Number of Documents: ", len(documents))
Document ID: a5f2737c-ed18-4e5d-ab9a-75955edb816d Number of Documents: 1
下载数据
In [ ]
已复制!
!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'
!mkdir -p 'data/paul_graham/' !wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'
您可以使用 SimpleDirectoryReader 单独处理您的文件
In [ ]
已复制!
loader = SimpleDirectoryReader("./data/paul_graham/")
documents = loader.load_data()
for file in loader.input_files:
print(file)
# Here is where you would do any preprocessing
loader = SimpleDirectoryReader("./data/paul_graham/") documents = loader.load_data() for file in loader.input_files: print(file) # Here is where you would do any preprocessing
../data/paul_graham/paul_graham_essay.txt
In [ ]
已复制!
# initialize with metadata filter and store indexes
from llama_index.core import StorageContext
for document in documents:
document.metadata = {"user_id": "123", "favorite_color": "blue"}
vector_store = MyScaleVectorStore(myscale_client=client)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
documents, storage_context=storage_context
)
# initialize with metadata filter and store indexes from llama_index.core import StorageContext for document in documents: document.metadata = {"user_id": "123", "favorite_color": "blue"} vector_store = MyScaleVectorStore(myscale_client=client) storage_context = StorageContext.from_defaults(vector_store=vector_store) index = VectorStoreIndex.from_documents( documents, storage_context=storage_context )
In [ ]
已复制!
import textwrap
from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine(
filters=MetadataFilters(
filters=[
ExactMatchFilter(key="user_id", value="123"),
]
),
similarity_top_k=2,
vector_store_query_mode="hybrid",
)
response = query_engine.query("What did the author learn?")
print(textwrap.fill(str(response), 100))
import textwrap from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters # set Logging to DEBUG for more detailed outputs query_engine = index.as_query_engine( filters=MetadataFilters( filters=[ ExactMatchFilter(key="user_id", value="123"), ] ), similarity_top_k=2, vector_store_query_mode="hybrid", ) response = query_engine.query("What did the author learn?") print(textwrap.fill(str(response), 100))
清除所有索引¶
In [ ]
已复制!
for document in documents:
index.delete_ref_doc(document.doc_id)
for document in documents: index.delete_ref_doc(document.doc_id)