classDFEvaporateProgram(BaseEvaporateProgram[DataFrameRowsOnly]):""" Evaporate DF program. Given a set of fields, extracts a dataframe from a set of nodes. Each node corresponds to a row in the dataframe - each value in the row corresponds to a field value. """deffit(self,nodes:List[BaseNode],field:str,field_context:Optional[Any]=None,expected_output:Optional[Any]=None,inplace:bool=True,)->str:"""Given the input Nodes and fields, synthesize the python code."""fn=self._extractor.extract_fn_from_nodes(nodes,field)logger.debug(f"Extracted function: {fn}")ifinplace:self._field_fns[field]=fnreturnfndef_inference(self,nodes:List[BaseNode],fn_str:str,field_name:str)->List[Any]:"""Given the input, call the python code and return the result."""results=self._extractor.run_fn_on_nodes(nodes,fn_str,field_name)logger.debug(f"Results: {results}")returnresults@propertydefoutput_cls(self)->Type[DataFrameRowsOnly]:"""Output class."""returnDataFrameRowsOnlydef__call__(self,*args:Any,**kwds:Any)->DataFrameRowsOnly:"""Call evaporate on inference data."""# TODO: either specify `nodes` or `texts` in kwdsif"nodes"inkwds:nodes=kwds["nodes"]elif"texts"inkwds:nodes=[TextNode(text=t)fortinkwds["texts"]]else:raiseValueError("Must provide either `nodes` or `texts`.")col_dict={}forfieldinself._fields:col_dict[field]=self._inference(nodes,self._field_fns[field],field)df=pd.DataFrame(col_dict,columns=self._fields)# convert pd.DataFrame to DataFrameRowsOnlydf_row_objs=[]forrow_arrindf.values:df_row_objs.append(DataFrameRow(row_values=list(row_arr)))returnDataFrameRowsOnly(rows=df_row_objs)
deffit(self,nodes:List[BaseNode],field:str,field_context:Optional[Any]=None,expected_output:Optional[Any]=None,inplace:bool=True,)->str:"""Given the input Nodes and fields, synthesize the python code."""fn=self._extractor.extract_fn_from_nodes(nodes,field)logger.debug(f"Extracted function: {fn}")ifinplace:self._field_fns[field]=fnreturnfn