Github 仓库阅读器¶
如果你在 Colab 上打开此 Notebook,你可能需要安装 LlamaIndex 🦙。
在 [ ]
已复制!
%pip install llama-index-readers-github
%pip install llama-index-readers-github
在 [ ]
已复制!
!pip install llama-index
!pip install llama-index
在 [ ]
已复制!
# This is due to the fact that we use asyncio.loop_until_complete in
# the DiscordReader. Since the Jupyter kernel itself runs on
# an event loop, we need to add some help with nesting
import nest_asyncio
nest_asyncio.apply()
# 这是因为我们在 # DiscordReader 中使用了 asyncio.loop_until_complete。由于 Jupyter 内核本身运行在 # 一个事件循环上,我们需要添加一些嵌套帮助。 import nest_asyncio nest_asyncio.apply()
在 [ ]
已复制!
%env OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
from llama_index.core import VectorStoreIndex
from llama_index.readers.github import GithubRepositoryReader, GithubClient
from IPython.display import Markdown, display
import os
%env OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx from llama_index.core import VectorStoreIndex from llama_index.readers.github import GithubRepositoryReader, GithubClient from IPython.display import Markdown, display import os
env: OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
在 [ ]
已复制!
%env GITHUB_TOKEN=github_pat_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
github_token = os.environ.get("GITHUB_TOKEN")
owner = "jerryjliu"
repo = "llama_index"
branch = "main"
github_client = GithubClient(github_token=github_token, verbose=True)
documents = GithubRepositoryReader(
github_client=github_client,
owner=owner,
repo=repo,
use_parser=False,
verbose=False,
filter_directories=(
["docs"],
GithubRepositoryReader.FilterType.INCLUDE,
),
filter_file_extensions=(
[
".png",
".jpg",
".jpeg",
".gif",
".svg",
".ico",
"json",
".ipynb",
],
GithubRepositoryReader.FilterType.EXCLUDE,
),
).load_data(branch=branch)
%env GITHUB_TOKEN=github_pat_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx github_token = os.environ.get("GITHUB_TOKEN") owner = "jerryjliu" repo = "llama_index" branch = "main" github_client = GithubClient(github_token=github_token, verbose=True) documents = GithubRepositoryReader( github_client=github_client, owner=owner, repo=repo, use_parser=False, verbose=False, filter_directories=( ["docs"], GithubRepositoryReader.FilterType.INCLUDE, ), filter_file_extensions=( [ ".png", ".jpg", ".jpeg", ".gif", ".svg", ".ico", "json", ".ipynb", ], GithubRepositoryReader.FilterType.EXCLUDE, ), ).load_data(branch=branch)
在 [ ]
已复制!
index = VectorStoreIndex.from_documents(documents)
index = VectorStoreIndex.from_documents(documents)
在 [ ]
已复制!
query_engine = index.as_query_engine()
response = query_engine.query(
"What is the difference between VectorStoreIndex and SummaryIndex?",
verbose=True,
)
query_engine = index.as_query_engine() response = query_engine.query( "What is the difference between VectorStoreIndex and SummaryIndex?", verbose=True, )
在 [ ]
已复制!
display(Markdown(f"<b>{response}</b>"))
display(Markdown(f"{response}"))