Spaces:
Sleeping
Sleeping
| from langchain_groq import ChatGroq | |
| from pydantic import BaseModel, Field | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| import os | |
| import numpy as np | |
| from langchain_core.tools import tool | |
| from utils.data_loader import load_influencer_data | |
| from utils.models_loader import ST , llm | |
| import numpy as np | |
| from langchain_core.messages import SystemMessage | |
| import re | |
| import faiss | |
| import ast | |
| import pandas as pd | |
| from .state import QueryFormatter | |
| os.environ['GROQ_API_KEY']=os.getenv('GROQ_API_KEY') | |
| def retrieve_tool(video_topic): | |
| ''' | |
| Always invoke this tool. | |
| Retrieve influencer's data by semantic search of **video topic**. | |
| ''' | |
| # === Load CSV === | |
| csv_path = 'extracted_data.csv' | |
| df = pd.read_csv(csv_path) | |
| # === Parse stored embeddings === | |
| df['embeddings'] = df['embeddings'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x) | |
| embeddings = np.vstack(df['embeddings'].values).astype('float32') | |
| # === Build FAISS index === | |
| dimension = embeddings.shape[1] | |
| index = faiss.IndexFlatL2(dimension) | |
| index.add(embeddings) | |
| # === Load SentenceTransformer model === | |
| # === Encode the query and search === | |
| query_embedding = ST.encode(str(video_topic)).reshape(1, -1).astype('float32') | |
| top_k=7 | |
| distances, indices = index.search(query_embedding, top_k) | |
| # === Format results === | |
| outer_list = [] | |
| for i, idx in enumerate(indices[0]): | |
| res = { | |
| 'rank': i + 1, | |
| 'username': df.iloc[idx]['username'], | |
| 'story': df.iloc[idx]['story'], | |
| 'visible_text_or_brandings': df.iloc[idx]['story'], | |
| 'likesCount': df.iloc[idx]['likesCount'], | |
| 'commentCount': df.iloc[idx]['commentCount'], | |
| } | |
| inner_list = [] | |
| inner_list.append(f"[{res['rank']}]. The influencer name is: **{res['username']}** — Likes: **{res['likesCount']}**, Comments: **{res['commentCount']}**") | |
| inner_list.append(f"The story of that particular video is:\n{res['story']}") | |
| inner_list.append(f"The branding or promotion done is:\n{res['visible_text_or_brandings']}") | |
| outer_list.append(inner_list) | |
| return str(outer_list) | |