File size: 1,001 Bytes
93a5bf9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c13e66f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from utils.data_loader import load_dataset
import pandas as pd
from utils.data_loader import load_influencer_data

def save_to_db(business_details):
    # dataset = load_dataset("subashdvorak/tiktok-agentic-story")['train']
    dataset = load_influencer_data()
    df = pd.DataFrame(dataset)

    # 2. Flatten all business detail values to a set of lowercase strings
    all_values = set()
    for v in business_details.values():
        if isinstance(v, str):
            all_values.add(v.lower())
        elif isinstance(v, list):
            all_values.update(map(str.lower, map(str, v)))

    # 3. Match rows where ANY column contains ANY of the values
    def row_matches(row):
        return any(
            str(cell).lower().find(val) != -1
            for cell in row
            for val in all_values
        )

    # 4. Apply row-wise matching
    matched_df = df[df.apply(row_matches, axis=1)]
    matched_df.to_csv('extracted_data.csv')
    print('Dataset updated according to business')