subashpoudel's picture
Refined embedding loader
b4fb6ac
raw
history blame
1.86 kB
from langchain_core.tools import tool
from .state import QueryFormatter
import pandas as pd
import numpy as np
import ast
import faiss
from utils.models_loader import ST
from utils.load_embeddings import embeddings , index
@tool("influencers_data_retrieval_tool", args_schema=QueryFormatter, return_direct=False,description="Retrieve influencer-related data for a given query.")
def retrieve_tool(business_details):
'''
Always invoke this tool.
Retrieve influencer's data by semantic search of **business details**.
'''
# === Load CSV ===
csv_path = 'extracted_data.csv'
df = pd.read_csv(csv_path)
query_embedding = ST.encode(str(business_details)).reshape(1, -1).astype('float32')
top_k=7
distances, indices = index.search(query_embedding, top_k)
# === Format results ===
outer_list = []
for i, idx in enumerate(indices[0]):
res = {
'rank': i + 1,
'username': df.iloc[idx]['username'],
'story': df.iloc[idx]['story'],
'visible_text_or_brandings': df.iloc[idx]['visible_texts_or_brandings'],
'likesCount': df.iloc[idx]['likesCount'],
'commentCount': df.iloc[idx]['commentCount'],
'product_or_service_details': df.iloc[idx]['product_or_service_details'],
}
inner_list = []
inner_list.append(f"[{res['rank']}]. The influencer name is: **{res['username']}** — Likes: **{res['likesCount']}**, Comments: **{res['commentCount']}**")
inner_list.append(f"The story of that particular video is:\n{res['story']}")
inner_list.append(f"The branding or promotion done is:\n{res['visible_text_or_brandings']}")
inner_list.append(f"The details of product or service is:\n{res['product_or_service_details']}")
outer_list.append(inner_list)
return str(outer_list)