Spaces:

subashdvorak
/

trygithubactions

Sleeping

App Files Files Community

trygithubactions / ideation_agent /utils /tools.py

subashpoudel

Refined embedding loader

b4fb6ac 7 months ago

raw

history blame

1.86 kB


	from langchain_core.tools import tool
	from .state import QueryFormatter
	import pandas as pd
	import numpy as np
	import ast
	import faiss
	from utils.models_loader import ST
	from utils.load_embeddings import embeddings , index

	@tool("influencers_data_retrieval_tool", args_schema=QueryFormatter, return_direct=False,description="Retrieve influencer-related data for a given query.")
	def retrieve_tool(business_details):
	'''
	Always invoke this tool.
	Retrieve influencer's data by semantic search of business details.
	'''
	# === Load CSV ===
	csv_path = 'extracted_data.csv'
	df = pd.read_csv(csv_path)


	query_embedding = ST.encode(str(business_details)).reshape(1, -1).astype('float32')
	top_k=7
	distances, indices = index.search(query_embedding, top_k)

	# === Format results ===
	outer_list = []
	for i, idx in enumerate(indices[0]):
	res = {
	'rank': i + 1,
	'username': df.iloc[idx]['username'],
	'story': df.iloc[idx]['story'],
	'visible_text_or_brandings': df.iloc[idx]['visible_texts_or_brandings'],
	'likesCount': df.iloc[idx]['likesCount'],
	'commentCount': df.iloc[idx]['commentCount'],
	'product_or_service_details': df.iloc[idx]['product_or_service_details'],

	}

	inner_list = []
	inner_list.append(f"[{res['rank']}]. The influencer name is: {res['username']} — Likes: {res['likesCount']}, Comments: {res['commentCount']}")
	inner_list.append(f"The story of that particular video is:\n{res['story']}")
	inner_list.append(f"The branding or promotion done is:\n{res['visible_text_or_brandings']}")
	inner_list.append(f"The details of product or service is:\n{res['product_or_service_details']}")
	outer_list.append(inner_list)

	return str(outer_list)