Spaces:

subashdvorak
/

trygithubactions

Sleeping

App Files Files Community

trygithubactions / src /genai /brainstroming_agent /utils /tools.py

subashpoudel

Fixed importing errors

d98138c 5 months ago

raw

history blame contribute delete

2.06 kB


	import os
	import numpy as np
	from src.genai.utils.models_loader import embedding_model
	import numpy as np
	import faiss
	import tiktoken
	from src.genai.utils.data_loader import caption_index , caption_df
	from src.genai.utils.utils import clean_text

	class Retrieval:
	def __init__(self, video_topic):
	self.video_topic = video_topic
	self.query_embedding = np.array(embedding_model.embed_query(str(self.video_topic))).reshape(1, -1).astype('float32')
	faiss.normalize_L2(self.query_embedding)

	def influencers_data(self):
	top_k = len(caption_df)
	distances, indices = caption_index.search(self.query_embedding, top_k)

	similarity_threshold = 0.35
	selected = [(idx, sim) for idx, sim in zip(indices[0], distances[0]) if sim >= similarity_threshold]

	if not selected:
	return "No influencers found."

	outer_list = []
	for rank, (idx, sim) in enumerate(selected, 1):
	row = caption_df.iloc[idx]
	res = {
	'rank': rank,
	'username': row['username'],
	'visible_text_or_brandings': row['visible_texts_or_brandings'],
	'likesCount': row['likesCount'],
	'commentCount': row['commentCount'],
	'product_or_service_details': row['product_or_service_details'],
	}

	inner_list = [
	f"[{res['rank']}]. The influencer name is: {res['username']} — Likes: {res['likesCount']}, Comments: {res['commentCount']}",
	f"The branding or promotion done is:\n{res['visible_text_or_brandings']}",
	f"The details of product or service is:\n{res['product_or_service_details']}"
	]
	outer_list.append(inner_list)

	cleaned_response = clean_text(str(outer_list))
	encoding = tiktoken.encoding_for_model('gpt-4o-mini')
	tokens = encoding.encode(cleaned_response)
	trimmed_response = tokens[:1000]
	return encoding.decode(trimmed_response)