Bagel 向量存储¶
如果您在colab上打开此Notebook,可能需要安装LlamaIndex 🦙。
输入 [ ]
已复制!
%pip install llama-index-vector-stores-bagel
%pip install llama-index
%pip install bagelML
%pip install llama-index-vector-stores-bagel %pip install llama-index %pip install bagelML
输入 [ ]
已复制!
import logging
import sys
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
import logging import sys logging.basicConfig(stream=sys.stdout, level=logging.INFO) logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
输入 [ ]
已复制!
# set up OpenAI
import os
import getpass
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
import openai
openai.api_key = os.environ["OPENAI_API_KEY"]
# 设置 OpenAI import os import getpass os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API 密钥:") import openai openai.api_key = os.environ["OPENAI_API_KEY"]
输入 [ ]
已复制!
import os
# Set environment variable
os.environ["BAGEL_API_KEY"] = getpass.getpass("Bagel API Key:")
import os # 设置环境变量 os.environ["BAGEL_API_KEY"] = getpass.getpass("Bagel API 密钥:")
输入 [ ]
已复制!
import bagel
from bagel import Settings
import bagel from bagel import Settings
输入 [ ]
已复制!
server_settings = Settings(
bagel_api_impl="rest", bagel_server_host="api.bageldb.ai"
)
client = bagel.Client(server_settings)
collection = client.get_or_create_cluster(
"testing_embeddings_3", embedding_model="custom", dimension=1536
)
server_settings = Settings( bagel_api_impl="rest", bagel_server_host="api.bageldb.ai" ) client = bagel.Client(server_settings) collection = client.get_or_create_cluster( "testing_embeddings_3", embedding_model="custom", dimension=1536 )
输入 [ ]
已复制!
from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.bagel import BagelVectorStore
from llama_index.core import VectorStoreIndex, StorageContext from llama_index.vector_stores.bagel import BagelVectorStore
输入 [ ]
已复制!
from llama_index.core.schema import TextNode
nodes = [
TextNode(
text=(
"Michael Jordan is a retired professional basketball player,"
" widely regarded as one of the greatest basketball players of all"
" time."
),
metadata={
"category": "Sports",
"country": "United States",
},
),
TextNode(
text=(
"Angelina Jolie is an American actress, filmmaker, and"
" humanitarian. She has received numerous awards for her acting"
" and is known for her philanthropic work."
),
metadata={
"category": "Entertainment",
"country": "United States",
},
),
TextNode(
text=(
"Elon Musk is a business magnate, industrial designer, and"
" engineer. He is the founder, CEO, and lead designer of SpaceX,"
" Tesla, Inc., Neuralink, and The Boring Company."
),
metadata={
"category": "Business",
"country": "United States",
},
),
TextNode(
text=(
"Rihanna is a Barbadian singer, actress, and businesswoman. She"
" has achieved significant success in the music industry and is"
" known for her versatile musical style."
),
metadata={
"category": "Music",
"country": "Barbados",
},
),
TextNode(
text=(
"Cristiano Ronaldo is a Portuguese professional footballer who is"
" considered one of the greatest football players of all time. He"
" has won numerous awards and set multiple records during his"
" career."
),
metadata={
"category": "Sports",
"country": "Portugal",
},
),
]
from llama_index.core.schema import TextNode nodes = [ TextNode( text=( "Michael Jordan is a retired professional basketball player," " widely regarded as one of the greatest basketball players of all" " time." ), metadata={ "category": "体育", "country": "美国", }, ), TextNode( text=( "Angelina Jolie is an American actress, filmmaker, and" " humanitarian. She has received numerous awards for her acting" " and is known for her philanthropic work." ), metadata={ "category": "娱乐", "country": "美国", }, ), TextNode( text=( "Elon Musk is a business magnate, industrial designer, and" " engineer. He is the founder, CEO, and lead designer of SpaceX," " Tesla, Inc., Neuralink, and The Boring Company." ), metadata={ "category": "商业", "country": "美国", }, ), TextNode( text=( "Rihanna is a Barbadian singer, actress, and businesswoman. She" " has achieved significant success in the music industry and is" " known for her versatile musical style." ), metadata={ "category": "音乐", "country": "巴巴多斯", }, ), TextNode( text=( "Cristiano Ronaldo is a Portuguese professional footballer who is" " considered one of the greatest football players of all time. He" " has won numerous awards and set multiple records during his" " career." ), metadata={ "category": "体育", "country": "葡萄牙", }, ), ]
输入 [ ]
已复制!
vector_store = BagelVectorStore(collection=collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
vector_store = BagelVectorStore(collection=collection) storage_context = StorageContext.from_defaults(vector_store=vector_store)
输入 [ ]
已复制!
index = VectorStoreIndex(nodes, storage_context=storage_context)
index = VectorStoreIndex(nodes, storage_context=storage_context)
输入 [ ]
已复制!
from llama_index.core.retrievers import VectorIndexAutoRetriever
from llama_index.core.vector_stores import MetadataInfo, VectorStoreInfo
vector_store_info = VectorStoreInfo(
content_info="brief biography of celebrities",
metadata_info=[
MetadataInfo(
name="category",
type="str",
description=(
"Category of the celebrity, one of [Sports, Entertainment,"
" Business, Music]"
),
),
MetadataInfo(
name="country",
type="str",
description=(
"Country of the celebrity, one of [United States, Barbados,"
" Portugal]"
),
),
],
)
retriever = VectorIndexAutoRetriever(
index, vector_store_info=vector_store_info
)
from llama_index.core.retrievers import VectorIndexAutoRetriever from llama_index.core.vector_stores import MetadataInfo, VectorStoreInfo vector_store_info = VectorStoreInfo( content_info="名人的简短传记", metadata_info=[ MetadataInfo( name="category", type="str", description=( "名人所属类别,可选值:[体育, 娱乐," " 商业, 音乐]" ), ), MetadataInfo( name="country", type="str", description=( "名人所在国家,可选值:[美国, 巴巴多斯," " 葡萄牙]" ), ), ], ) retriever = VectorIndexAutoRetriever( index, vector_store_info=vector_store_info )
输入 [ ]
已复制!
retriever.retrieve("celebrity")
retriever.retrieve("celebrity")