LLM Pydantic 程序 - NVIDIA¶

本指南展示了如何使用我们的 LLMTextCompletionProgram 生成结构化数据。给定一个 LLM 和一个输出 Pydantic 类，生成一个结构化的 Pydantic 对象。

关于目标对象，您可以选择直接指定 output_cls，或者指定一个 PydanticOutputParser 或任何其他生成 Pydantic 对象的 BaseOutputParser。

在下面的示例中，我们将向您展示将数据提取到 Album 对象（可以包含 Song 对象列表）的不同方法。

提取到 `Album` 类¶

这是一个将输出解析为 Album 模式的简单示例，该模式可以包含多个歌曲。

只需在初始化 LLMTextCompletionProgram 时将 Album 传入 output_cls 属性。

In [ ]

已复制！

%pip install llama-index-readers-file llama-index-embeddings-nvidia llama-index-llms-nvidia
%pip install llama-index-readers-file llama-index-embeddings-nvidia llama-index-llms-nvidia

In [ ]

已复制！





import getpass
import os

# del os.environ['NVIDIA_API_KEY']  ## delete key and reset
if os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"):
    print("Valid NVIDIA_API_KEY already in environment. Delete to reset")
else:
    nvapi_key = getpass.getpass("NVAPI Key (starts with nvapi-): ")
    assert nvapi_key.startswith(
        "nvapi-"
    ), f"{nvapi_key[:5]}... is not a valid key"
    os.environ["NVIDIA_API_KEY"] = nvapi_key
import getpass import os # del os.environ['NVIDIA_API_KEY'] ## delete key and reset if os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"): print("Valid NVIDIA_API_KEY already in environment. Delete to reset") else: nvapi_key = getpass.getpass("NVAPI Key (starts with nvapi-): ") assert nvapi_key.startswith( "nvapi-" ), f"{nvapi_key[:5]}... is not a valid key" os.environ["NVIDIA_API_KEY"] = nvapi_key

In [ ]

已复制！





from pydantic import BaseModel
from typing import List
from llama_index.core import Settings
from llama_index.llms.nvidia import NVIDIA
from llama_index.embeddings.nvidia import NVIDIAEmbedding
from llama_index.core.program import LLMTextCompletionProgram
from llama_index.core.program import FunctionCallingProgram
from pydantic import BaseModel from typing import List from llama_index.core import Settings from llama_index.llms.nvidia import NVIDIA from llama_index.embeddings.nvidia import NVIDIAEmbedding from llama_index.core.program import LLMTextCompletionProgram from llama_index.core.program import FunctionCallingProgram

In [ ]

已复制！

llm = NVIDIA()

embedder = NVIDIAEmbedding(model="NV-Embed-QA", truncate="END")
Settings.embed_model = embedder
Settings.llm = llm
llm = NVIDIA() embedder = NVIDIAEmbedding(model="NV-Embed-QA", truncate="END") Settings.embed_model = embedder Settings.llm = llm

In [ ]

已复制！

class Song(BaseModel):
    """Data model for a song."""

    title: str
    length_seconds: int

class Album(BaseModel):
    """Data model for an album."""

    name: str
    artist: str
    songs: List[Song]
class Song(BaseModel): """Data model for a song.""" title: str length_seconds: int class Album(BaseModel): """Data model for an album.""" name: str artist: str songs: List[Song]

In [ ]

已复制！





prompt_template_str = """\
Generate an example album, with an artist and a list of songs. \
Using the movie {movie_name} as inspiration.\
"""
program = LLMTextCompletionProgram.from_defaults(
    output_cls=Album,
    prompt_template_str=prompt_template_str,
    verbose=True,
)
prompt_template_str = """\ Generate an example album, with an artist and a list of songs. \ Using the movie {movie_name} as inspiration.\ """ program = LLMTextCompletionProgram.from_defaults( output_cls=Album, prompt_template_str=prompt_template_str, verbose=True, )

运行程序获取结构化输出。

In [ ]

已复制！

output = program(movie_name="The Shining")
output = program(movie_name="The Shining")

输出是一个有效的 Pydantic 对象，我们可以用它来调用函数/API。

In [ ]

已复制！

output
output

In [ ]

已复制！

from llama_index.core.output_parsers import PydanticOutputParser

program = LLMTextCompletionProgram.from_defaults(
    output_parser=PydanticOutputParser(output_cls=Album),
    prompt_template_str=prompt_template_str,
    verbose=True,
)
from llama_index.core.output_parsers import PydanticOutputParser program = LLMTextCompletionProgram.from_defaults( output_parser=PydanticOutputParser(output_cls=Album), prompt_template_str=prompt_template_str, verbose=True, )

In [ ]

已复制！

output = program(movie_name="Lord of the Rings")
output
output = program(movie_name="Lord of the Rings") output

定义自定义输出解析器¶

有时您可能希望以自己的方式将输出解析为 JSON 对象。

In [ ]

已复制！





from llama_index.core.output_parsers import ChainableOutputParser


class CustomAlbumOutputParser(ChainableOutputParser):
    """Custom Album output parser.

    Assume first line is name and artist.

    Assume each subsequent line is the song.

    """

    def __init__(self, verbose: bool = False):
        self.verbose = verbose

    def parse(self, output: str) -> Album:
        """Parse output."""
        if self.verbose:
            print(f"> Raw output: {output}")
        lines = output.split("\n")
        lines = list(filter(None, (line.strip() for line in lines)))
        name, artist = lines[1].split(",")
        songs = []
        for i in range(2, len(lines)):
            title, length_seconds = lines[i].split(",")
            songs.append(Song(title=title, length_seconds=length_seconds))

        return Album(name=name, artist=artist, songs=songs)
from llama_index.core.output_parsers import ChainableOutputParser class CustomAlbumOutputParser(ChainableOutputParser): """自定义 Album 输出解析器。 假定第一行是专辑名称和艺术家。 假定后续每一行是一首歌曲。 """ def __init__(self, verbose: bool = False): self.verbose = verbose def parse(self, output: str) -> Album: """解析输出。""" if self.verbose: print(f"> Raw output: {output}") lines = output.split("\n") lines = list(filter(None, (line.strip() for line in lines))) name, artist = lines[1].split(",") songs = [] for i in range(2, len(lines)): title, length_seconds = lines[i].split(",") songs.append(Song(title=title, length_seconds=length_seconds)) return Album(name=name, artist=artist, songs=songs)

In [ ]

已复制！





prompt_template_str = """\
Generate an example album, with an artist and a list of songs. \
Using the movie {movie_name} as inspiration.\

Return answer in following format.
The first line is:
<album_name>, <album_artist>
Every subsequent line is a song with format:
<song_title>, <song_length_in_seconds>

"""
program = LLMTextCompletionProgram.from_defaults(
    output_parser=CustomAlbumOutputParser(verbose=True),
    output_cls=Album,
    prompt_template_str=prompt_template_str,
    verbose=True,
)
prompt_template_str = """\ Generate an example album, with an artist and a list of songs. \ Using the movie {movie_name} as inspiration.\ Return answer in following format. The first line is, Every subsequent line is a song with format, """ program = LLMTextCompletionProgram.from_defaults( output_parser=CustomAlbumOutputParser(verbose=True), output_cls=Album, prompt_template_str=prompt_template_str, verbose=True, )

In [ ]

已复制！

output = program(movie_name="The Dark Knight")
print(output)
output = program(movie_name="The Dark Knight") print(output)

函数调用程序用于结构化提取¶

本指南展示了如何使用我们的 FunctionCallingProgram 进行结构化数据提取。给定一个支持函数调用的 LLM 和一个输出 Pydantic 类，生成一个结构化的 Pydantic 对象。

在下面的示例中，我们将向您展示将数据提取到 Album 对象（可以包含 Song 对象列表）的不同方法。

注意：FunctionCallingProgram 只适用于原生支持函数调用的 LLM，它通过将 Pydantic 对象的模式作为工具的“工具参数”插入。对于所有其他 LLM，请使用我们的 LLMTextCompletionProgram，它将直接通过文本提示模型以获得结构化输出。

模型中不包含 docstring¶

In [ ]

已复制！

llm = NVIDIA(model="meta/llama-3.1-8b-instruct")
llm = NVIDIA(model="meta/llama-3.1-8b-instruct")

In [ ]

已复制！

class Song(BaseModel):
    title: str
    length_seconds: int

class Album(BaseModel):
    name: str
    artist: str
    songs: List[Song]
class Song(BaseModel): title: str length_seconds: int class Album(BaseModel): name: str artist: str songs: List[Song]

定义 pydantic 程序

In [ ]

已复制！





prompt_template_str = """\
Generate an example album, with an artist and a list of songs. \
Using the movie {movie_name} as inspiration.\
"""

program = FunctionCallingProgram.from_defaults(
    output_cls=Album,
    prompt_template_str=prompt_template_str,
    verbose=True,
    llm=llm,
)
prompt_template_str = """\ Generate an example album, with an artist and a list of songs. \ Using the movie {movie_name} as inspiration.\ """ program = FunctionCallingProgram.from_defaults( output_cls=Album, prompt_template_str=prompt_template_str, verbose=True, llm=llm, )

运行程序获取结构化输出。

In [ ]

已复制！

output = program(
    movie_name="The Shining", description="Data model for an album."
)
output = program( movie_name="The Shining", description="Data model for an album." )

模型中包含 docstring¶

In [ ]

已复制！

class Song(BaseModel):
    """Data model for a song."""

    title: str
    length_seconds: int

class Album(BaseModel):
    """Data model for an album."""

    name: str
    artist: str
    songs: List[Song]
class Song(BaseModel): """Data model for a song.""" title: str length_seconds: int class Album(BaseModel): """Data model for an album.""" name: str artist: str songs: List[Song]

In [ ]

已复制！





prompt_template_str = """\
Generate an example album, with an artist and a list of songs. \
Using the movie {movie_name} as inspiration.\
"""
program = FunctionCallingProgram.from_defaults(
    output_cls=Album,
    prompt_template_str=prompt_template_str,
    verbose=True,
    llm=llm,
)
prompt_template_str = """\ Generate an example album, with an artist and a list of songs. \ Using the movie {movie_name} as inspiration.\ """ program = FunctionCallingProgram.from_defaults( output_cls=Album, prompt_template_str=prompt_template_str, verbose=True, llm=llm, )

运行程序获取结构化输出。

In [ ]

已复制！

output = program(movie_name="The Shining")
output = program(movie_name="The Shining")

输出是一个有效的 Pydantic 对象，我们可以用它来调用函数/API。

In [ ]

已复制！

output
output

Langchain 输出解析¶

下载数据

In [ ]

已复制！

!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'
!mkdir -p 'data/paul_graham/' !wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'

加载文档，构建 VectorStoreIndex¶

In [ ]

已复制！

import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from IPython.display import Markdown, display
import logging import sys logging.basicConfig(stream=sys.stdout, level=logging.INFO) logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) from llama_index.core import VectorStoreIndex, SimpleDirectoryReader from IPython.display import Markdown, display

In [ ]

已复制！

# load documents
documents = SimpleDirectoryReader("./data/paul_graham/").load_data()
# load documents documents = SimpleDirectoryReader("./data/paul_graham/").load_data()

In [ ]

已复制！

index = VectorStoreIndex.from_documents(documents, chunk_size=512)
index = VectorStoreIndex.from_documents(documents, chunk_size=512)

定义查询 + Langchain 输出解析器¶

In [ ]

已复制！

from llama_index.core.output_parsers import LangchainOutputParser
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from llama_index.core.output_parsers import LangchainOutputParser from langchain.output_parsers import StructuredOutputParser, ResponseSchema

定义自定义问答和精炼提示

In [ ]

已复制！





response_schemas = [
    ResponseSchema(
        name="Education",
        description=(
            "Describes the author's educational experience/background."
        ),
    ),
    ResponseSchema(
        name="Work",
        description="Describes the author's work experience/background.",
    ),
]
response_schemas = [ ResponseSchema( name="Education", description=( "Describes the author's educational experience/background." ), ), ResponseSchema( name="Work", description="Describes the author's work experience/background.", ), ]

In [ ]

已复制！

lc_output_parser = StructuredOutputParser.from_response_schemas(
    response_schemas
)
output_parser = LangchainOutputParser(lc_output_parser)
lc_output_parser = StructuredOutputParser.from_response_schemas( response_schemas ) output_parser = LangchainOutputParser(lc_output_parser)

In [ ]

已复制！

from llama_index.core.prompts.default_prompts import (
    DEFAULT_TEXT_QA_PROMPT_TMPL,
)

# take a look at the new QA template!
fmt_qa_tmpl = output_parser.format(DEFAULT_TEXT_QA_PROMPT_TMPL)
print(fmt_qa_tmpl)
from llama_index.core.prompts.default_prompts import ( DEFAULT_TEXT_QA_PROMPT_TMPL, ) # take a look at the new QA template! fmt_qa_tmpl = output_parser.format(DEFAULT_TEXT_QA_PROMPT_TMPL) print(fmt_qa_tmpl)

查询索引¶

In [ ]

已复制！





query_engine = index.as_query_engine(
    llm=llm,
)
response = query_engine.query(
    "What are a few things the author did growing up?",
)
query_engine = index.as_query_engine( llm=llm, ) response = query_engine.query( "What are a few things the author did growing up?", )

LLM Pydantic 程序 - NVIDIA¶

提取到 Album 类¶

定义自定义输出解析器¶

函数调用程序用于结构化提取¶

模型中不包含 docstring¶

模型中包含 docstring¶

Langchain 输出解析¶

加载文档，构建 VectorStoreIndex¶

定义查询 + Langchain 输出解析器¶

查询索引¶

提取到 `Album` 类¶