基类: BoxReaderBase
一个使用 Box AI Extract 从 Box 文件加载数据的读取器类。
该类继承自 BaseReader
类,专门用于使用 Box AI Extract 处理 Box 文件中的数据。它利用提供的 BoxClient 对象与 Box API 交互,并根据指定的 AI 提示提取数据。
属性
名称 |
类型 |
描述 |
_box_client |
BoxClient
|
用于与 Box API 交互的已认证 Box 客户端对象。
|
源代码位于 llama-index-integrations/readers/llama-index-readers-box/llama_index/readers/box/BoxReaderAIExtraction/base.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126 | class BoxReaderAIExtract(BoxReaderBase):
"""
A reader class for loading data from Box files using Box AI Extract.
This class inherits from the `BaseReader` class and specializes in
processing data from Box files using Box AI Extract. It utilizes the
provided BoxClient object to interact with the Box API and extracts
data based on a specified AI prompt.
Attributes:
_box_client (BoxClient): An authenticated Box client object used
for interacting with the Box API.
"""
_box_client: BoxClient
@classmethod
def class_name(cls) -> str:
return "BoxReaderAIExtract"
def __init__(self, box_client: BoxClient):
super().__init__(box_client=box_client)
def load_data(
self,
ai_prompt: str,
file_ids: Optional[List[str]] = None,
folder_id: Optional[str] = None,
is_recursive: bool = False,
) -> List[Document]:
"""
Extracts data from Box files using Box AI and creates Document objects.
This method utilizes the Box AI Extract functionality to extract data
based on the provided AI prompt from the specified Box files. It then
creates Document objects containing the extracted data along with
file metadata.
Args:
ai_prompt (str): The AI prompt that specifies what data to extract
from the files.
file_ids (Optional[List[str]], optional): A list of Box file IDs
to extract data from. If provided, folder_id is ignored.
Defaults to None.
folder_id (Optional[str], optional): The ID of the Box folder to
extract data from. If provided, along with is_recursive set to
True, retrieves data from sub-folders as well. Defaults to None.
is_recursive (bool, optional): If True and folder_id is provided,
extracts data from sub-folders within the specified folder.
Defaults to False.
Returns:
List[Document]: A list of Document objects containing the extracted
data and file metadata.
"""
# check if the box client is authenticated
box_check_connection(self._box_client)
docs: List[Document] = []
box_files: List[File] = []
# get payload information
if file_ids is not None:
box_files.extend(
get_box_files_details(box_client=self._box_client, file_ids=file_ids)
)
elif folder_id is not None:
box_files.extend(
get_box_folder_files_details(
box_client=self._box_client,
folder_id=folder_id,
is_recursive=is_recursive,
)
)
box_files = get_files_ai_extract_data(
box_client=self._box_client,
box_files=box_files,
ai_prompt=ai_prompt,
)
for file in box_files:
doc = box_file_to_llama_document(file)
doc.text = file.ai_response if file.ai_response else ""
doc.metadata["ai_prompt"] = file.ai_prompt
doc.metadata["ai_response"] = file.ai_response
docs.append(doc)
return docs
def load_resource(self, box_file_id: str, ai_prompt: str) -> List[Document]:
"""
Load data from a specific resource.
Args:
resource (str): The resource identifier.
Returns:
List[Document]: A list of documents loaded from the resource.
"""
return self.load_data(file_ids=[box_file_id], ai_prompt=ai_prompt)
|
load_data(ai_prompt: str, file_ids: Optional[List[str]] = None, folder_id: Optional[str] = None, is_recursive: bool = False) -> List[Document]
使用 Box AI 从 Box 文件中提取数据并创建 Document 对象。
此方法利用 Box AI Extract 功能,根据提供的 AI 提示从指定的 Box 文件中提取数据。然后,它创建包含提取数据及文件元数据的 Document 对象。
参数
名称 |
类型 |
描述 |
默认值 |
ai_prompt
|
str
|
|
必需
|
file_ids
|
可选[列表[字符串]]
|
要从中提取数据的 Box 文件 ID 列表。如果提供,folder_id 将被忽略。默认为 None。
|
无
|
folder_id
|
可选[字符串]
|
要从中提取数据的 Box 文件夹 ID。如果提供,并且 is_recursive 设置为 True,则也会从子文件夹中检索数据。默认为 None。
|
无
|
is_recursive
|
bool
|
如果为 True 且 folder_id 提供,则从指定文件夹内的子文件夹中提取数据。默认为 False。
|
False
|
返回值
类型 |
描述 |
列表[Document]
|
List[Document]: 包含提取数据和文件元数据的 Document 对象列表。
|
源代码位于 llama-index-integrations/readers/llama-index-readers-box/llama_index/readers/box/BoxReaderAIExtraction/base.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113 | def load_data(
self,
ai_prompt: str,
file_ids: Optional[List[str]] = None,
folder_id: Optional[str] = None,
is_recursive: bool = False,
) -> List[Document]:
"""
Extracts data from Box files using Box AI and creates Document objects.
This method utilizes the Box AI Extract functionality to extract data
based on the provided AI prompt from the specified Box files. It then
creates Document objects containing the extracted data along with
file metadata.
Args:
ai_prompt (str): The AI prompt that specifies what data to extract
from the files.
file_ids (Optional[List[str]], optional): A list of Box file IDs
to extract data from. If provided, folder_id is ignored.
Defaults to None.
folder_id (Optional[str], optional): The ID of the Box folder to
extract data from. If provided, along with is_recursive set to
True, retrieves data from sub-folders as well. Defaults to None.
is_recursive (bool, optional): If True and folder_id is provided,
extracts data from sub-folders within the specified folder.
Defaults to False.
Returns:
List[Document]: A list of Document objects containing the extracted
data and file metadata.
"""
# check if the box client is authenticated
box_check_connection(self._box_client)
docs: List[Document] = []
box_files: List[File] = []
# get payload information
if file_ids is not None:
box_files.extend(
get_box_files_details(box_client=self._box_client, file_ids=file_ids)
)
elif folder_id is not None:
box_files.extend(
get_box_folder_files_details(
box_client=self._box_client,
folder_id=folder_id,
is_recursive=is_recursive,
)
)
box_files = get_files_ai_extract_data(
box_client=self._box_client,
box_files=box_files,
ai_prompt=ai_prompt,
)
for file in box_files:
doc = box_file_to_llama_document(file)
doc.text = file.ai_response if file.ai_response else ""
doc.metadata["ai_prompt"] = file.ai_prompt
doc.metadata["ai_response"] = file.ai_response
docs.append(doc)
return docs
|
load_resource(box_file_id: str, ai_prompt: str) -> List[Document]
从特定资源加载数据。
参数
名称 |
类型 |
描述 |
默认值 |
resource
|
str
|
|
必需
|
返回值
类型 |
描述 |
列表[Document]
|
List[Document]: 从资源加载的文档列表。
|
源代码位于 llama-index-integrations/readers/llama-index-readers-box/llama_index/readers/box/BoxReaderAIExtraction/base.py
115
116
117
118
119
120
121
122
123
124
125
126 | def load_resource(self, box_file_id: str, ai_prompt: str) -> List[Document]:
"""
Load data from a specific resource.
Args:
resource (str): The resource identifier.
Returns:
List[Document]: A list of documents loaded from the resource.
"""
return self.load_data(file_ids=[box_file_id], ai_prompt=ai_prompt)
|