跳到内容

Pydantic

PydanticProgramExtractor #

基类: BaseExtractor, Generic[Model]

Pydantic 程序提取器。

使用 LLM 提取 Pydantic 对象。在字典中返回该对象的属性。

参数

名称 类型 描述 默认值
program BasePydanticProgram[TypeVar]

要提取的 Pydantic 程序。

必需的
input_key str

用作程序输入的键 (程序模板字符串必须公开此键)。

'input'
extract_template_str str

用于提取的模板。

'Here is the content of the section:\n----------------\n{context_str}\n----------------\nGiven the contextual information, extract out a {class_name} object.'
源代码位于 llama-index-core/llama_index/core/extractors/metadata_extractors.py
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
class PydanticProgramExtractor(BaseExtractor, Generic[Model]):
    """
    Pydantic program extractor.

    Uses an LLM to extract out a Pydantic object. Return attributes of that object
    in a dictionary.

    """

    program: SerializeAsAny[BasePydanticProgram[Model]] = Field(
        ..., description="Pydantic program to extract."
    )
    input_key: str = Field(
        default="input",
        description=(
            "Key to use as input to the program (the program "
            "template string must expose this key)."
        ),
    )
    extract_template_str: str = Field(
        default=DEFAULT_EXTRACT_TEMPLATE_STR,
        description="Template to use for extraction.",
    )

    @classmethod
    def class_name(cls) -> str:
        return "PydanticModelExtractor"

    async def _acall_program(self, node: BaseNode) -> Dict[str, Any]:
        """Call the program on a node."""
        if self.is_text_node_only and not isinstance(node, TextNode):
            return {}

        extract_str = self.extract_template_str.format(
            context_str=node.get_content(metadata_mode=self.metadata_mode),
            class_name=self.program.output_cls.__name__,
        )

        ret_object = await self.program.acall(**{self.input_key: extract_str})
        assert not isinstance(ret_object, list)

        return ret_object.model_dump()

    async def aextract(self, nodes: Sequence[BaseNode]) -> List[Dict]:
        """Extract pydantic program."""
        program_jobs = []
        for node in nodes:
            program_jobs.append(self._acall_program(node))

        metadata_list: List[Dict] = await run_jobs(
            program_jobs, show_progress=self.show_progress, workers=self.num_workers
        )

        return metadata_list

aextract async #

aextract(nodes: Sequence[BaseNode]) -> List[Dict]

提取 pydantic 程序。

源代码位于 llama-index-core/llama_index/core/extractors/metadata_extractors.py
514
515
516
517
518
519
520
521
522
523
524
async def aextract(self, nodes: Sequence[BaseNode]) -> List[Dict]:
    """Extract pydantic program."""
    program_jobs = []
    for node in nodes:
        program_jobs.append(self._acall_program(node))

    metadata_list: List[Dict] = await run_jobs(
        program_jobs, show_progress=self.show_progress, workers=self.num_workers
    )

    return metadata_list