Spaces:
Sleeping
Sleeping
| import faiss | |
| import ast | |
| import pandas as pd | |
| import numpy as np | |
| from src.genai.utils.models_loader import ST | |
| import json | |
| def retrieve_tool(business_details): | |
| ''' | |
| Always invoke this tool. | |
| Retrieve influencer's data by semantic search of **business details**. | |
| ''' | |
| # === Load CSV === | |
| csv_path = 'extracted_data.csv' | |
| df = pd.read_csv(csv_path) | |
| # === Parse stored embeddings === | |
| df['embeddings'] = df['embeddings'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x) | |
| embeddings = np.vstack(df['embeddings'].values).astype('float32') | |
| # === Build FAISS index === | |
| dimension = embeddings.shape[1] | |
| index = faiss.IndexFlatL2(dimension) | |
| index.add(embeddings) | |
| # === Encode the query and search === | |
| query_embedding = ST.encode(str(business_details)).reshape(1, -1).astype('float32') | |
| top_k = 15 | |
| distances, indices = index.search(query_embedding, top_k) | |
| # === Format results === | |
| results = [] | |
| for i, idx in enumerate(indices[0]): | |
| res = { | |
| 'username': df.iloc[idx]['username'], | |
| 'likesCount': int(df.iloc[idx]['likesCount']), | |
| 'commentCount': int(df.iloc[idx]['commentCount']) | |
| } | |
| results.append(res) | |
| return results | |
| details = { | |
| "business_type": "fitness and gym", | |
| "platform": "Instagram, TikTok", | |
| "target_audience": "young Nepali adults (ages 18–40) who are health-conscious and active on social media", | |
| "business_goals": "to expand gym branches across all major cities of Nepal and build a strong fitness community", | |
| "offerings": "personal training, group fitness classes, modern workout equipment, nutrition guidance, and wellness programs", | |
| "Challenges_faced": "attracting loyal members, standing out in a competitive market, and promoting consistent engagement" | |
| } | |
| print(retrieve_tool(details)) | |