Spaces:
Sleeping
Sleeping
File size: 1,871 Bytes
6874dac 583f6dd 6874dac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import faiss
import ast
import pandas as pd
import numpy as np
from src.genai.utils.models_loader import ST
import json
def retrieve_tool(business_details):
'''
Always invoke this tool.
Retrieve influencer's data by semantic search of **business details**.
'''
# === Load CSV ===
csv_path = 'extracted_data.csv'
df = pd.read_csv(csv_path)
# === Parse stored embeddings ===
df['embeddings'] = df['embeddings'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
embeddings = np.vstack(df['embeddings'].values).astype('float32')
# === Build FAISS index ===
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)
# === Encode the query and search ===
query_embedding = ST.encode(str(business_details)).reshape(1, -1).astype('float32')
top_k = 15
distances, indices = index.search(query_embedding, top_k)
# === Format results ===
results = []
for i, idx in enumerate(indices[0]):
res = {
'username': df.iloc[idx]['username'],
'likesCount': int(df.iloc[idx]['likesCount']),
'commentCount': int(df.iloc[idx]['commentCount'])
}
results.append(res)
return results
details = {
"business_type": "fitness and gym",
"platform": "Instagram, TikTok",
"target_audience": "young Nepali adults (ages 18–40) who are health-conscious and active on social media",
"business_goals": "to expand gym branches across all major cities of Nepal and build a strong fitness community",
"offerings": "personal training, group fitness classes, modern workout equipment, nutrition guidance, and wellness programs",
"Challenges_faced": "attracting loyal members, standing out in a competitive market, and promoting consistent engagement"
}
print(retrieve_tool(details))
|