Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| from src.genai.utils.data_loader import caption_df | |
| from src.genai.utils.models_loader import llm_gpt | |
| from .prompts import details_extract_prompt | |
| from langchain_core.messages import SystemMessage, HumanMessage | |
| from .state import DetailsFormatter | |
| class DetailsExtractorNode: | |
| def __init__(self, interactions): | |
| self.llm = llm_gpt | |
| self.interactions = interactions | |
| def run(self): | |
| template = details_extract_prompt() | |
| messages = [SystemMessage(content=template), HumanMessage(content=str(self.interactions))] | |
| response=llm_gpt.with_structured_output(DetailsFormatter).invoke(messages) | |
| return response.model_dump() | |
| class SaveToDB: | |
| def __init__(self, caption_df): | |
| self.df = caption_df.drop(columns=['embeddings'], errors='ignore') | |
| def _prepare_values(self, business_details): | |
| """Extract lowercase string values from business_details dict.""" | |
| all_values = set() | |
| for v in business_details.values(): | |
| if isinstance(v, str): | |
| all_values.add(v.lower()) | |
| elif isinstance(v, list): | |
| all_values.update(map(str.lower, map(str, v))) | |
| return all_values | |
| def _row_matches(self, row, all_values): | |
| """Check if any value in all_values exists in the row.""" | |
| return any( | |
| str(cell).lower().find(val) != -1 | |
| for cell in row | |
| for val in all_values | |
| ) | |
| def save_to_csv(self, business_details, output_file='extracted_data.csv'): | |
| """Filter dataframe rows based on business_details and save to CSV.""" | |
| all_values = self._prepare_values(business_details) | |
| matched_df = self.df[self.df.apply(self._row_matches, axis=1, args=(all_values,))] | |
| matched_df.to_csv(output_file, index=False) | |