|
|
from langchain_groq import ChatGroq |
|
|
from pydantic import BaseModel, Field |
|
|
from dotenv import load_dotenv |
|
|
load_dotenv() |
|
|
import os |
|
|
import numpy as np |
|
|
from langchain_core.tools import tool |
|
|
from utils.data_loader import load_influencer_data |
|
|
from utils.models_loader import ST , llm |
|
|
import numpy as np |
|
|
from langchain_core.messages import SystemMessage |
|
|
import re |
|
|
import faiss |
|
|
import ast |
|
|
import pandas as pd |
|
|
from .state import QueryFormatter |
|
|
|
|
|
os.environ['GROQ_API_KEY']=os.getenv('GROQ_API_KEY') |
|
|
@tool("influencer's data-retrieval-tool", args_schema=QueryFormatter, return_direct=False,description="Retrieve influencer-related data for a given query.") |
|
|
def retrieve_tool(video_topic): |
|
|
''' |
|
|
Always invoke this tool. |
|
|
Retrieve influencer's data by semantic search of **video topic**. |
|
|
''' |
|
|
|
|
|
csv_path = 'extracted_data.csv' |
|
|
df = pd.read_csv(csv_path) |
|
|
|
|
|
|
|
|
df['embeddings'] = df['embeddings'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x) |
|
|
embeddings = np.vstack(df['embeddings'].values).astype('float32') |
|
|
|
|
|
|
|
|
dimension = embeddings.shape[1] |
|
|
index = faiss.IndexFlatL2(dimension) |
|
|
index.add(embeddings) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
query_embedding = ST.encode(str(video_topic)).reshape(1, -1).astype('float32') |
|
|
top_k=7 |
|
|
distances, indices = index.search(query_embedding, top_k) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
outer_list = [] |
|
|
for i, idx in enumerate(indices[0]): |
|
|
res = { |
|
|
'rank': i + 1, |
|
|
'username': df.iloc[idx]['username'], |
|
|
'story': df.iloc[idx]['story'], |
|
|
'visible_text_or_brandings': df.iloc[idx]['story'], |
|
|
'likesCount': df.iloc[idx]['likesCount'], |
|
|
'commentCount': df.iloc[idx]['commentCount'], |
|
|
} |
|
|
|
|
|
inner_list = [] |
|
|
inner_list.append(f"[{res['rank']}]. The influencer name is: **{res['username']}** — Likes: **{res['likesCount']}**, Comments: **{res['commentCount']}**") |
|
|
inner_list.append(f"The story of that particular video is:\n{res['story']}") |
|
|
inner_list.append(f"The branding or promotion done is:\n{res['visible_text_or_brandings']}") |
|
|
|
|
|
outer_list.append(inner_list) |
|
|
|
|
|
return str(outer_list) |
|
|
|
|
|
|