Spaces:
Sleeping
Sleeping
File size: 2,341 Bytes
be3a5c4 ca75c57 93a5bf9 1ce8b88 3e87e76 6523fd0 be3a5c4 ca75c57 3e87e76 eb40d68 3e87e76 eb40d68 3e87e76 be3a5c4 3e87e76 6178c40 3e87e76 ca75c57 3e87e76 ca75c57 3e87e76 eb40d68 3e87e76 ca75c57 1ce8b88 3e87e76 9e8b261 3e87e76 1ce8b88 3e87e76 9e8b261 3e87e76 1ce8b88 3e87e76 1ce8b88 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
from langchain_groq import ChatGroq
from pydantic import BaseModel, Field
from dotenv import load_dotenv
load_dotenv()
import os
import numpy as np
from langchain_core.tools import tool
from utils.data_loader import load_influencer_data
from utils.models_loader import ST , llm
import numpy as np
from langchain_core.messages import SystemMessage
import re
import faiss
import ast
import pandas as pd
from .state import QueryFormatter
os.environ['GROQ_API_KEY']=os.getenv('GROQ_API_KEY')
@tool("influencer's data-retrieval-tool", args_schema=QueryFormatter, return_direct=False,description="Retrieve influencer-related data for a given query.")
def retrieve_tool(video_topic):
'''
Always invoke this tool.
Retrieve influencer's data by semantic search of **video topic**.
'''
# === Load CSV ===
csv_path = 'extracted_data.csv'
df = pd.read_csv(csv_path)
# === Parse stored embeddings ===
df['embeddings'] = df['embeddings'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
embeddings = np.vstack(df['embeddings'].values).astype('float32')
# === Build FAISS index ===
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)
# === Load SentenceTransformer model ===
# === Encode the query and search ===
query_embedding = ST.encode(str(video_topic)).reshape(1, -1).astype('float32')
top_k=7
distances, indices = index.search(query_embedding, top_k)
# === Format results ===
outer_list = []
for i, idx in enumerate(indices[0]):
res = {
'rank': i + 1,
'username': df.iloc[idx]['username'],
'story': df.iloc[idx]['story'],
'visible_text_or_brandings': df.iloc[idx]['story'],
'likesCount': df.iloc[idx]['likesCount'],
'commentCount': df.iloc[idx]['commentCount'],
}
inner_list = []
inner_list.append(f"[{res['rank']}]. The influencer name is: **{res['username']}** — Likes: **{res['likesCount']}**, Comments: **{res['commentCount']}**")
inner_list.append(f"The story of that particular video is:\n{res['story']}")
inner_list.append(f"The branding or promotion done is:\n{res['visible_text_or_brandings']}")
outer_list.append(inner_list)
return str(outer_list)
|