Evaporate DF 程序。
给定一组字段,从一组节点中提取一个 DataFrame。每个节点对应于 DataFrame 中的一行 - 行中的每个值对应于一个字段值。
源代码位于 llama-index-integrations/program/llama-index-program-evaporate/llama_index/program/evaporate/base.py
output_cls 属性
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184 | class DFEvaporateProgram(BaseEvaporateProgram[DataFrameRowsOnly]):
"""
Evaporate DF program.
Given a set of fields, extracts a dataframe from a set of nodes.
Each node corresponds to a row in the dataframe - each value in the row
corresponds to a field value.
"""
def fit(
self,
nodes: List[BaseNode],
field: str,
field_context: Optional[Any] = None,
expected_output: Optional[Any] = None,
inplace: bool = True,
) -> str:
"""Given the input Nodes and fields, synthesize the python code."""
fn = self._extractor.extract_fn_from_nodes(nodes, field)
logger.debug(f"Extracted function: {fn}")
if inplace:
self._field_fns[field] = fn
return fn
def _inference(
self, nodes: List[BaseNode], fn_str: str, field_name: str
) -> List[Any]:
"""Given the input, call the python code and return the result."""
results = self._extractor.run_fn_on_nodes(nodes, fn_str, field_name)
logger.debug(f"Results: {results}")
return results
@property
def output_cls(self) -> Type[DataFrameRowsOnly]:
"""Output class."""
return DataFrameRowsOnly
def __call__(self, *args: Any, **kwds: Any) -> DataFrameRowsOnly:
"""Call evaporate on inference data."""
# TODO: either specify `nodes` or `texts` in kwds
if "nodes" in kwds:
nodes = kwds["nodes"]
elif "texts" in kwds:
nodes = [TextNode(text=t) for t in kwds["texts"]]
else:
raise ValueError("Must provide either `nodes` or `texts`.")
col_dict = {}
for field in self._fields:
col_dict[field] = self._inference(nodes, self._field_fns[field], field)
df = pd.DataFrame(col_dict, columns=self._fields)
# convert pd.DataFrame to DataFrameRowsOnly
df_row_objs = []
for row_arr in df.values:
df_row_objs.append(DataFrameRow(row_values=list(row_arr)))
return DataFrameRowsOnly(rows=df_row_objs)
|
输出类。
output_cls: Type[DataFrameRowsOnly]
给定输入节点和字段,合成 Python 代码。
fit(nodes: List[BaseNode], field: str, field_context: Optional[Any] = None, expected_output: Optional[Any] = None, inplace: bool = True) -> str
返回顶部
output_cls 属性
136
137
138
139
140
141
142
143
144
145
146
147
148
149 | def fit(
self,
nodes: List[BaseNode],
field: str,
field_context: Optional[Any] = None,
expected_output: Optional[Any] = None,
inplace: bool = True,
) -> str:
"""Given the input Nodes and fields, synthesize the python code."""
fn = self._extractor.extract_fn_from_nodes(nodes, field)
logger.debug(f"Extracted function: {fn}")
if inplace:
self._field_fns[field] = fn
return fn
|