subashpoudel's picture
Included CI CD
583f6dd
raw
history blame
1 kB
# from utils.data_loader import load_dataset
import pandas as pd
from utils.data_loader import load_influencer_data
def save_to_db(business_details):
# dataset = load_dataset("subashdvorak/tiktok-agentic-story")['train']
dataset = load_influencer_data()
df = pd.DataFrame(dataset)
# 2. Flatten all business detail values to a set of lowercase strings
all_values = set()
for v in business_details.values():
if isinstance(v, str):
all_values.add(v.lower())
elif isinstance(v, list):
all_values.update(map(str.lower, map(str, v)))
# 3. Match rows where ANY column contains ANY of the values
def row_matches(row):
return any(
str(cell).lower().find(val) != -1
for cell in row
for val in all_values
)
# 4. Apply row-wise matching
matched_df = df[df.apply(row_matches, axis=1)]
matched_df.to_csv('extracted_data.csv')
print('Dataset updated according to business')