Spaces:

subashdvorak
/

trygithubactions

Sleeping

App Files Files Community

trygithubactions / src /genai /orchestration_agent /utils /tools.py

subashpoudel

Refined chatbot

6f57d05 3 months ago

raw

history blame contribute delete

2.55 kB

	import faiss
	import ast
	import pandas as pd
	import numpy as np
	from src.genai.utils.data_loader import caption_df, caption_index
	from src.genai.utils.models_loader import embedding_model , encoding_model
	from src.genai.utils.utils import clean_text
	import tiktoken


	class InfluencerRetrievalTool:
	"""Tool for retrieving influencer data based on semantic search."""

	def __init__(self):
	self.df = caption_df
	self.index = caption_index

	def retrieve_for_analytics(self, business_details):
	print('Generating embeddings..')
	query_embedding = np.array(embedding_model.embed_query(str(business_details))).reshape(1, -1).astype('float32')
	print('Embeddings generated')
	distances, indices = self.index.search(query_embedding, 10)
	results = []
	for idx in indices[0]:
	row = self.df.iloc[idx]
	results.append({
	'url': row['videoUrl'],
	'username': row['username'],
	'likesCount': int(row['likesCount']) if pd.notnull(row['likesCount']) else None,
	'commentCount': int(row['commentCount']) if pd.notnull(row['commentCount']) else None
	})
	return results

	def retrieve_for_orchestration(self, query):
	query_embedding = np.array(embedding_model.embed_query(str(query))).reshape(1, -1).astype('float32')
	print('Embeddings Generated')
	faiss.normalize_L2(query_embedding)
	print('Query embedded')
	distances, indices = self.index.search(query_embedding, len(self.df))
	similarity_threshold = 0.35
	selected = [(idx, sim) for idx, sim in zip(indices[0], distances[0]) if sim >= similarity_threshold]
	if not selected:
	return "No influencers found."

	outer_list = []
	for rank, (idx, sim) in enumerate(selected, 1):
	row = self.df.iloc[idx]
	inner_list = [
	f"[{rank}]. The influencer name is: {row['username']} — Likes: {row['likesCount']}, Comments: {row['commentCount']}",
	f"The branding or promotion done is:\n{row['visible_texts_or_brandings']}",
	f"The details of product or service is:\n{row['product_or_service_details']}"
	]
	outer_list.append(inner_list)

	cleaned_response = clean_text(str(outer_list))
	print('response cleaned')
	tokens = encoding_model.encode(cleaned_response)[:1000]
	print('tokens got')
	return encoding_model.decode(tokens)