| | import pandas as pd |
| | import re |
| | from synthesis_qa_backend import ResearchSynthesizer |
| | from config import API_KEY, INDEX_PATH, METADATA_PATH, SPECIFIC_COUNTRIES |
| |
|
| | class DataHandler: |
| | def __init__(self): |
| | self.synthesizer = None |
| | self.docs_df = pd.DataFrame() |
| | self.countries_list = [] |
| | self.sectors_list = [] |
| | self.load_data() |
| | |
| | def load_data(self): |
| | """Initialize the research system and load data""" |
| | try: |
| | self.synthesizer = ResearchSynthesizer(INDEX_PATH, METADATA_PATH, API_KEY) |
| | metadata_df = pd.read_csv(METADATA_PATH) |
| | self.docs_df = metadata_df.drop_duplicates(subset=['record_id']) |
| | print(f"✅ Loaded {len(self.docs_df)} unique documents") |
| | |
| | |
| | self.countries_list, self.sectors_list = self._get_unique_values() |
| | |
| | except Exception as e: |
| | print(f"❌ Error loading system: {e}") |
| | self.synthesizer = None |
| | self.docs_df = pd.DataFrame() |
| | |
| | def _get_unique_values(self): |
| | """Get unique values for dropdowns""" |
| | if self.docs_df.empty: |
| | return [], [] |
| | |
| | countries_list = [] |
| | sectors_list = [] |
| | |
| | if 'study_countries' in self.docs_df.columns: |
| | for countries_str in self.docs_df['study_countries'].dropna(): |
| | if pd.isna(countries_str) or str(countries_str).lower() in ['nan', 'none', '']: |
| | continue |
| | countries = [c.strip() for c in str(countries_str).replace(';', ',').split(',')] |
| | filtered = [c for c in countries if c in SPECIFIC_COUNTRIES and len(c) > 1] |
| | countries_list.extend(filtered) |
| | |
| | countries_list = sorted(list(set(countries_list))) |
| | |
| | if 'world_bank_sector' in self.docs_df.columns: |
| | sectors_list = sorted(self.docs_df['world_bank_sector'].dropna().unique().tolist()) |
| | |
| | return countries_list, sectors_list |
| | |
| | def get_data(self): |
| | """Return all data objects""" |
| | return { |
| | 'synthesizer': self.synthesizer, |
| | 'docs_df': self.docs_df, |
| | 'countries_list': self.countries_list, |
| | 'sectors_list': self.sectors_list |
| | } |
| |
|