如果您正在 colab 上打开此 Notebook,您可能需要安装 LlamaIndex 🦙。
输入 [ ]
已复制!
%pip install llama-index-vector-stores-supabase
%pip install llama-index-vector-stores-supabase
输入 [ ]
已复制!
!pip install llama-index
!pip install llama-index
输入 [ ]
已复制!
import logging
import sys
# Uncomment to see debug logs
# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
from llama_index.core import SimpleDirectoryReader, Document, StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.supabase import SupabaseVectorStore
import textwrap
import logging import sys # 取消注释以查看调试日志 # logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) # logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) from llama_index.core import SimpleDirectoryReader, Document, StorageContext from llama_index.core import VectorStoreIndex from llama_index.vector_stores.supabase import SupabaseVectorStore import textwrap
设置 OpenAI¶
第一步是配置 OpenAI 密钥。它将用于为加载到索引中的文档创建嵌入。
输入 [ ]
已复制!
import os
os.environ["OPENAI_API_KEY"] = "[your_openai_api_key]"
import os os.environ["OPENAI_API_KEY"] = "[your_openai_api_key]"
下载数据
输入 [ ]
已复制!
!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'
!mkdir -p 'data/paul_graham/' !wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'
加载文档¶
使用 SimpleDirectoryReader 加载存储在 ./data/paul_graham/ 中的文档
输入 [ ]
已复制!
documents = SimpleDirectoryReader("./data/paul_graham/").load_data()
print(
"Document ID:",
documents[0].doc_id,
"Document Hash:",
documents[0].doc_hash,
)
documents = SimpleDirectoryReader("./data/paul_graham/").load_data() print( "Document ID:", documents[0].doc_id, "Document Hash:", documents[0].doc_hash, )
Document ID: fb056993-ee9e-4463-80b4-32cf9509d1d8 Document Hash: 77ae91ab542f3abb308c4d7c77c9bc4c9ad0ccd63144802b7cbe7e1bb3a4094e
创建由 Supabase 向量存储支持的索引。¶
这适用于所有支持 pgvector 的 Postgres 提供商。如果集合不存在,我们将尝试创建一个新集合
注意:如果您不使用 OpenAI 的 text-embedding-ada-002,则需要传入嵌入维度,例如 vector_store = SupabaseVectorStore(..., dimension=...)
输入 [ ]
已复制!
vector_store = SupabaseVectorStore(
postgres_connection_string=(
"postgresql://<user>:<password>@<host>:<port>/<db_name>"
),
collection_name="base_demo",
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
documents, storage_context=storage_context
)
vector_store = SupabaseVectorStore( postgres_connection_string=( "postgresql://:@:/" ), collection_name="base_demo", ) storage_context = StorageContext.from_defaults(vector_store=vector_store) index = VectorStoreIndex.from_documents( documents, storage_context=storage_context )
查询索引¶
现在我们可以使用索引提问了。
输入 [ ]
已复制!
query_engine = index.as_query_engine()
response = query_engine.query("Who is the author?")
query_engine = index.as_query_engine() response = query_engine.query("Who is the author?")
/Users/suo/miniconda3/envs/llama/lib/python3.9/site-packages/vecs/collection.py:182: UserWarning: Query does not have a covering index for cosine_distance. See Collection.create_index warnings.warn(
输入 [ ]
已复制!
print(textwrap.fill(str(response), 100))
print(textwrap.fill(str(response), 100))
The author of this text is Paul Graham.
输入 [ ]
已复制!
response = query_engine.query("What did the author do growing up?")
response = query_engine.query("What did the author do growing up?")
输入 [ ]
已复制!
print(textwrap.fill(str(response), 100))
print(textwrap.fill(str(response), 100))
The author grew up writing essays, learning Italian, exploring Florence, painting people, working with computers, attending RISD, living in a rent-stabilized apartment, building an online store builder, editing Lisp expressions, publishing essays online, writing essays, painting still life, working on spam filters, cooking for groups, and buying a building in Cambridge.
使用元数据过滤器¶
输入 [ ]
已复制!
from llama_index.core.schema import TextNode
nodes = [
TextNode(
**{
"text": "The Shawshank Redemption",
"metadata": {
"author": "Stephen King",
"theme": "Friendship",
},
}
),
TextNode(
**{
"text": "The Godfather",
"metadata": {
"director": "Francis Ford Coppola",
"theme": "Mafia",
},
}
),
TextNode(
**{
"text": "Inception",
"metadata": {
"director": "Christopher Nolan",
},
}
),
]
from llama_index.core.schema import TextNode nodes = [ TextNode( **{ "text": "The Shawshank Redemption", "metadata": { "author": "Stephen King", "theme": "Friendship", }, } ), TextNode( **{ "text": "The Godfather", "metadata": { "director": "Francis Ford Coppola", "theme": "Mafia", }, } ), TextNode( **{ "text": "Inception", "metadata": { "director": "Christopher Nolan", }, } ), ]
输入 [ ]
已复制!
vector_store = SupabaseVectorStore(
postgres_connection_string=(
"postgresql://<user>:<password>@<host>:<port>/<db_name>"
),
collection_name="metadata_filters_demo",
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(nodes, storage_context=storage_context)
vector_store = SupabaseVectorStore( postgres_connection_string=( "postgresql://:@:/" ), collection_name="metadata_filters_demo", ) storage_context = StorageContext.from_defaults(vector_store=vector_store) index = VectorStoreIndex(nodes, storage_context=storage_context)
定义元数据过滤器
输入 [ ]
已复制!
from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters
filters = MetadataFilters(
filters=[ExactMatchFilter(key="theme", value="Mafia")]
)
from llama_index.core.vector_stores import ExactMatchFilter, MetadataFilters filters = MetadataFilters( filters=[ExactMatchFilter(key="theme", value="Mafia")] )
使用过滤器从向量存储中检索
输入 [ ]
已复制!
retriever = index.as_retriever(filters=filters)
retriever.retrieve("What is inception about?")
retriever = index.as_retriever(filters=filters) retriever.retrieve("What is inception about?")
输出 [ ]
[NodeWithScore(node=Node(text='The Godfather', doc_id='f837ed85-aacb-4552-b88a-7c114a5be15d', embedding=None, doc_hash='f8ee912e238a39fe2e620fb232fa27ade1e7f7c819b6d5b9cb26f3dddc75b6c0', extra_info={'theme': 'Mafia', 'director': 'Francis Ford Coppola'}, node_info={'_node_type': '1'}, relationships={}), score=0.20671339734643313)]