import faiss import ast import pandas as pd import numpy as np from src.genai.utils.models_loader import ST import json def retrieve_tool(business_details): ''' Always invoke this tool. Retrieve influencer's data by semantic search of **business details**. ''' # === Load CSV === csv_path = 'extracted_data.csv' df = pd.read_csv(csv_path) # === Parse stored embeddings === df['embeddings'] = df['embeddings'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x) embeddings = np.vstack(df['embeddings'].values).astype('float32') # === Build FAISS index === dimension = embeddings.shape[1] index = faiss.IndexFlatL2(dimension) index.add(embeddings) # === Encode the query and search === query_embedding = ST.encode(str(business_details)).reshape(1, -1).astype('float32') top_k = 15 distances, indices = index.search(query_embedding, top_k) # === Format results === results = [] for i, idx in enumerate(indices[0]): res = { 'username': df.iloc[idx]['username'], 'likesCount': int(df.iloc[idx]['likesCount']), 'commentCount': int(df.iloc[idx]['commentCount']) } results.append(res) return results details = { "business_type": "fitness and gym", "platform": "Instagram, TikTok", "target_audience": "young Nepali adults (ages 18–40) who are health-conscious and active on social media", "business_goals": "to expand gym branches across all major cities of Nepal and build a strong fitness community", "offerings": "personal training, group fitness classes, modern workout equipment, nutrition guidance, and wellness programs", "Challenges_faced": "attracting loyal members, standing out in a competitive market, and promoting consistent engagement" } print(retrieve_tool(details))