跳到内容

索引

基础读取器类。

BaseReader #

基类:ABC

用于从目录加载数据的工具。

源码位于 llama-index-core/llama_index/core/readers/base.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
class BaseReader(ABC):  # pragma: no cover
    """Utilities for loading data from a directory."""

    def lazy_load_data(self, *args: Any, **load_kwargs: Any) -> Iterable[Document]:
        """Load data from the input directory lazily."""
        raise NotImplementedError(
            f"{self.__class__.__name__} does not provide lazy_load_data method currently"
        )

    async def alazy_load_data(
        self, *args: Any, **load_kwargs: Any
    ) -> Iterable[Document]:
        """Load data from the input directory lazily."""
        # Threaded async - just calls the sync method with to_thread. Override in subclasses for real async implementations.
        return await asyncio.to_thread(self.lazy_load_data, *args, **load_kwargs)

    def load_data(self, *args: Any, **load_kwargs: Any) -> List[Document]:
        """Load data from the input directory."""
        return list(self.lazy_load_data(*args, **load_kwargs))

    async def aload_data(self, *args: Any, **load_kwargs: Any) -> List[Document]:
        """Load data from the input directory."""
        return await asyncio.to_thread(self.load_data, *args, **load_kwargs)

    def load_langchain_documents(self, **load_kwargs: Any) -> List["LCDocument"]:
        """Load data in LangChain document format."""
        docs = self.load_data(**load_kwargs)
        return [d.to_langchain_format() for d in docs]

lazy_load_data #

lazy_load_data(*args: Any, **load_kwargs: Any) -> Iterable[Document]

从输入目录延迟加载数据。

源码位于 llama-index-core/llama_index/core/readers/base.py
22
23
24
25
26
def lazy_load_data(self, *args: Any, **load_kwargs: Any) -> Iterable[Document]:
    """Load data from the input directory lazily."""
    raise NotImplementedError(
        f"{self.__class__.__name__} does not provide lazy_load_data method currently"
    )

alazy_load_data 异步 #

alazy_load_data(*args: Any, **load_kwargs: Any) -> Iterable[Document]

从输入目录延迟加载数据。

源码位于 llama-index-core/llama_index/core/readers/base.py
28
29
30
31
32
33
async def alazy_load_data(
    self, *args: Any, **load_kwargs: Any
) -> Iterable[Document]:
    """Load data from the input directory lazily."""
    # Threaded async - just calls the sync method with to_thread. Override in subclasses for real async implementations.
    return await asyncio.to_thread(self.lazy_load_data, *args, **load_kwargs)

load_data #

load_data(*args: Any, **load_kwargs: Any) -> List[Document]

从输入目录加载数据。

源码位于 llama-index-core/llama_index/core/readers/base.py
35
36
37
def load_data(self, *args: Any, **load_kwargs: Any) -> List[Document]:
    """Load data from the input directory."""
    return list(self.lazy_load_data(*args, **load_kwargs))

aload_data 异步 #

aload_data(*args: Any, **load_kwargs: Any) -> List[Document]

从输入目录加载数据。

源码位于 llama-index-core/llama_index/core/readers/base.py
39
40
41
async def aload_data(self, *args: Any, **load_kwargs: Any) -> List[Document]:
    """Load data from the input directory."""
    return await asyncio.to_thread(self.load_data, *args, **load_kwargs)

load_langchain_documents #

load_langchain_documents(**load_kwargs: Any) -> List[Document]

加载 LangChain 文档格式的数据。

源码位于 llama-index-core/llama_index/core/readers/base.py
43
44
45
46
def load_langchain_documents(self, **load_kwargs: Any) -> List["LCDocument"]:
    """Load data in LangChain document format."""
    docs = self.load_data(**load_kwargs)
    return [d.to_langchain_format() for d in docs]

BasePydanticReader #

基类:BaseReader, BaseComponent

带 Pydantic 的可序列化数据加载器。

参数

名称 类型 描述 默认值
is_remote bool

数据是加载自远程 API 还是本地文件。

源码位于 llama-index-core/llama_index/core/readers/base.py
49
50
51
52
53
54
55
56
class BasePydanticReader(BaseReader, BaseComponent):
    """Serialiable Data Loader with Pydantic."""

    model_config = ConfigDict(arbitrary_types_allowed=True)
    is_remote: bool = Field(
        default=False,
        description="Whether the data is loaded from a remote API or a local file.",
    )