Spaces:
Sleeping
Sleeping
| # from utils.data_loader import load_dataset | |
| import pandas as pd | |
| from utils.data_loader import load_influencer_data | |
| def save_to_db(business_details): | |
| # dataset = load_dataset("subashdvorak/tiktok-agentic-story")['train'] | |
| dataset = load_influencer_data() | |
| df = pd.DataFrame(dataset) | |
| # 2. Flatten all business detail values to a set of lowercase strings | |
| all_values = set() | |
| for v in business_details.values(): | |
| if isinstance(v, str): | |
| all_values.add(v.lower()) | |
| elif isinstance(v, list): | |
| all_values.update(map(str.lower, map(str, v))) | |
| # 3. Match rows where ANY column contains ANY of the values | |
| def row_matches(row): | |
| return any( | |
| str(cell).lower().find(val) != -1 | |
| for cell in row | |
| for val in all_values | |
| ) | |
| # 4. Apply row-wise matching | |
| matched_df = df[df.apply(row_matches, axis=1)] | |
| matched_df.to_csv('extracted_data.csv') | |
| print('Dataset updated according to business') | |