Spaces:
Sleeping
Sleeping
Commit
·
b4fb6ac
1
Parent(s):
6874dac
Refined embedding loader
Browse files- __pycache__/main.cpython-312.pyc +0 -0
- brainstroming_agent/utils/__pycache__/tools.cpython-312.pyc +0 -0
- brainstroming_agent/utils/tools.py +4 -34
- ideation_agent/utils/__pycache__/tools.cpython-312.pyc +0 -0
- ideation_agent/utils/tools.py +1 -13
- main.py +1 -0
- orchestration_agent/agent.py +19 -2
- orchestration_agent/utils/nodes.py +2 -0
- orchestration_agent/utils/prompts.py +4 -3
- orchestration_agent/utils/tools.py +54 -14
- orchestration_agent/utils/utils.py +3 -9
- requirements.txt +3 -0
- utils/__pycache__/data_loader.cpython-312.pyc +0 -0
- utils/__pycache__/models_loader.cpython-312.pyc +0 -0
- utils/data_loader.py +1 -2
- utils/load_embeddings.py +18 -0
- utils/models_loader.py +6 -30
__pycache__/main.cpython-312.pyc
CHANGED
|
Binary files a/__pycache__/main.cpython-312.pyc and b/__pycache__/main.cpython-312.pyc differ
|
|
|
brainstroming_agent/utils/__pycache__/tools.cpython-312.pyc
CHANGED
|
Binary files a/brainstroming_agent/utils/__pycache__/tools.cpython-312.pyc and b/brainstroming_agent/utils/__pycache__/tools.cpython-312.pyc differ
|
|
|
brainstroming_agent/utils/tools.py
CHANGED
|
@@ -14,6 +14,7 @@ import faiss
|
|
| 14 |
import ast
|
| 15 |
import pandas as pd
|
| 16 |
from .state import QueryFormatter
|
|
|
|
| 17 |
|
| 18 |
os.environ['GROQ_API_KEY']=os.getenv('GROQ_API_KEY')
|
| 19 |
# @tool("influencers_data_retrieval_tool", args_schema=QueryFormatter, return_direct=False,description="Retrieve influencer-related data for a given query.")
|
|
@@ -22,28 +23,12 @@ def retrieve_tool(video_topic):
|
|
| 22 |
Always invoke this tool.
|
| 23 |
Retrieve influencer's data by semantic search of **video topic**.
|
| 24 |
'''
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
df = pd.read_csv(csv_path)
|
| 28 |
-
|
| 29 |
-
# === Parse stored embeddings ===
|
| 30 |
-
df['embeddings'] = df['embeddings'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
|
| 31 |
-
embeddings = np.vstack(df['embeddings'].values).astype('float32')
|
| 32 |
-
|
| 33 |
-
# === Build FAISS index ===
|
| 34 |
-
dimension = embeddings.shape[1]
|
| 35 |
-
index = faiss.IndexFlatL2(dimension)
|
| 36 |
-
index.add(embeddings)
|
| 37 |
-
|
| 38 |
-
# === Load SentenceTransformer model ===
|
| 39 |
-
|
| 40 |
-
# === Encode the query and search ===
|
| 41 |
query_embedding = ST.encode(str(video_topic)).reshape(1, -1).astype('float32')
|
| 42 |
top_k=10
|
| 43 |
distances, indices = index.search(query_embedding, top_k)
|
| 44 |
|
| 45 |
-
|
| 46 |
-
|
| 47 |
# === Format results ===
|
| 48 |
outer_list = []
|
| 49 |
for i, idx in enumerate(indices[0]):
|
|
@@ -70,22 +55,7 @@ def retrieve_manual(video_topic):
|
|
| 70 |
Always invoke this tool.
|
| 71 |
Retrieve influencer's data by semantic search of **video topic**.
|
| 72 |
'''
|
| 73 |
-
|
| 74 |
-
csv_path = 'extracted_data.csv'
|
| 75 |
-
df = pd.read_csv(csv_path)
|
| 76 |
-
|
| 77 |
-
# === Parse stored embeddings ===
|
| 78 |
-
df['embeddings'] = df['embeddings'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
|
| 79 |
-
embeddings = np.vstack(df['embeddings'].values).astype('float32')
|
| 80 |
-
|
| 81 |
-
# === Build FAISS index ===
|
| 82 |
-
dimension = embeddings.shape[1]
|
| 83 |
-
index = faiss.IndexFlatL2(dimension)
|
| 84 |
-
index.add(embeddings)
|
| 85 |
-
|
| 86 |
-
# === Load SentenceTransformer model ===
|
| 87 |
-
|
| 88 |
-
# === Encode the query and search ===
|
| 89 |
query_embedding = ST.encode(str(video_topic)).reshape(1, -1).astype('float32')
|
| 90 |
top_k=5
|
| 91 |
distances, indices = index.search(query_embedding, top_k)
|
|
|
|
| 14 |
import ast
|
| 15 |
import pandas as pd
|
| 16 |
from .state import QueryFormatter
|
| 17 |
+
from utils.load_embeddings import embeddings , index
|
| 18 |
|
| 19 |
os.environ['GROQ_API_KEY']=os.getenv('GROQ_API_KEY')
|
| 20 |
# @tool("influencers_data_retrieval_tool", args_schema=QueryFormatter, return_direct=False,description="Retrieve influencer-related data for a given query.")
|
|
|
|
| 23 |
Always invoke this tool.
|
| 24 |
Retrieve influencer's data by semantic search of **video topic**.
|
| 25 |
'''
|
| 26 |
+
df = pd.read_csv('extracted_data.csv')
|
| 27 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
query_embedding = ST.encode(str(video_topic)).reshape(1, -1).astype('float32')
|
| 29 |
top_k=10
|
| 30 |
distances, indices = index.search(query_embedding, top_k)
|
| 31 |
|
|
|
|
|
|
|
| 32 |
# === Format results ===
|
| 33 |
outer_list = []
|
| 34 |
for i, idx in enumerate(indices[0]):
|
|
|
|
| 55 |
Always invoke this tool.
|
| 56 |
Retrieve influencer's data by semantic search of **video topic**.
|
| 57 |
'''
|
| 58 |
+
df = pd.read_csv('extracted_data.csv')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
query_embedding = ST.encode(str(video_topic)).reshape(1, -1).astype('float32')
|
| 60 |
top_k=5
|
| 61 |
distances, indices = index.search(query_embedding, top_k)
|
ideation_agent/utils/__pycache__/tools.cpython-312.pyc
CHANGED
|
Binary files a/ideation_agent/utils/__pycache__/tools.cpython-312.pyc and b/ideation_agent/utils/__pycache__/tools.cpython-312.pyc differ
|
|
|
ideation_agent/utils/tools.py
CHANGED
|
@@ -6,6 +6,7 @@ import numpy as np
|
|
| 6 |
import ast
|
| 7 |
import faiss
|
| 8 |
from utils.models_loader import ST
|
|
|
|
| 9 |
|
| 10 |
@tool("influencers_data_retrieval_tool", args_schema=QueryFormatter, return_direct=False,description="Retrieve influencer-related data for a given query.")
|
| 11 |
def retrieve_tool(business_details):
|
|
@@ -17,24 +18,11 @@ def retrieve_tool(business_details):
|
|
| 17 |
csv_path = 'extracted_data.csv'
|
| 18 |
df = pd.read_csv(csv_path)
|
| 19 |
|
| 20 |
-
# === Parse stored embeddings ===
|
| 21 |
-
df['embeddings'] = df['embeddings'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
|
| 22 |
-
embeddings = np.vstack(df['embeddings'].values).astype('float32')
|
| 23 |
|
| 24 |
-
# === Build FAISS index ===
|
| 25 |
-
dimension = embeddings.shape[1]
|
| 26 |
-
index = faiss.IndexFlatL2(dimension)
|
| 27 |
-
index.add(embeddings)
|
| 28 |
-
|
| 29 |
-
# === Load SentenceTransformer model ===
|
| 30 |
-
|
| 31 |
-
# === Encode the query and search ===
|
| 32 |
query_embedding = ST.encode(str(business_details)).reshape(1, -1).astype('float32')
|
| 33 |
top_k=7
|
| 34 |
distances, indices = index.search(query_embedding, top_k)
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
# === Format results ===
|
| 39 |
outer_list = []
|
| 40 |
for i, idx in enumerate(indices[0]):
|
|
|
|
| 6 |
import ast
|
| 7 |
import faiss
|
| 8 |
from utils.models_loader import ST
|
| 9 |
+
from utils.load_embeddings import embeddings , index
|
| 10 |
|
| 11 |
@tool("influencers_data_retrieval_tool", args_schema=QueryFormatter, return_direct=False,description="Retrieve influencer-related data for a given query.")
|
| 12 |
def retrieve_tool(business_details):
|
|
|
|
| 18 |
csv_path = 'extracted_data.csv'
|
| 19 |
df = pd.read_csv(csv_path)
|
| 20 |
|
|
|
|
|
|
|
|
|
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
query_embedding = ST.encode(str(business_details)).reshape(1, -1).astype('float32')
|
| 23 |
top_k=7
|
| 24 |
distances, indices = index.search(query_embedding, top_k)
|
| 25 |
|
|
|
|
|
|
|
| 26 |
# === Format results ===
|
| 27 |
outer_list = []
|
| 28 |
for i, idx in enumerate(indices[0]):
|
main.py
CHANGED
|
@@ -163,6 +163,7 @@ def generate_final_story_endpoint():
|
|
| 163 |
return {
|
| 164 |
'response': final_story
|
| 165 |
}
|
|
|
|
| 166 |
# stored_data['final_story']= '''A cinematic journey follows a street magician\'s
|
| 167 |
# metamorphosis from a mere trickster to a powerful performer, as he transforms his act with newfound physical strength, effortlessly executing death-defying stunts, and inspiring a captivated crowd to take action, all set against a
|
| 168 |
# backdrop of urban grandeur and pulsing energy.'''
|
|
|
|
| 163 |
return {
|
| 164 |
'response': final_story
|
| 165 |
}
|
| 166 |
+
|
| 167 |
# stored_data['final_story']= '''A cinematic journey follows a street magician\'s
|
| 168 |
# metamorphosis from a mere trickster to a powerful performer, as he transforms his act with newfound physical strength, effortlessly executing death-defying stunts, and inspiring a captivated crowd to take action, all set against a
|
| 169 |
# backdrop of urban grandeur and pulsing energy.'''
|
orchestration_agent/agent.py
CHANGED
|
@@ -4,6 +4,7 @@ from .utils.nodes import tool_return_node, extract_user_reference_node
|
|
| 4 |
from utils.models_loader import llm_gpt
|
| 5 |
from .utils.state import ValidationFormatter
|
| 6 |
from .utils.utils import caption_image , extract_latest_response_block
|
|
|
|
| 7 |
|
| 8 |
import re
|
| 9 |
from langchain_core.messages import SystemMessage
|
|
@@ -20,17 +21,33 @@ def orchestration_graph():
|
|
| 20 |
workflow.add_edge('chatbot2', END)
|
| 21 |
return workflow.compile(checkpointer=memory)
|
| 22 |
|
|
|
|
| 23 |
def orchestration_chat(user_input: str, image_base64=[]):
|
|
|
|
|
|
|
|
|
|
| 24 |
if len(image_base64)>0:
|
| 25 |
caption_response = caption_image(image_base64, user_input)
|
|
|
|
|
|
|
| 26 |
print('Caption Response:', caption_response)
|
| 27 |
else:
|
| 28 |
caption_response =''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
agent = orchestration_graph()
|
| 31 |
config = {"configurable": {"thread_id": "orchestration-thread"}}
|
| 32 |
-
response = agent.invoke({"messages": [{'role':'human','content':user_input},
|
| 33 |
-
|
|
|
|
|
|
|
| 34 |
response=llm_gpt.with_structured_output(ValidationFormatter).invoke(extract_latest_response_block(response))
|
| 35 |
return response
|
| 36 |
|
|
|
|
| 4 |
from utils.models_loader import llm_gpt
|
| 5 |
from .utils.state import ValidationFormatter
|
| 6 |
from .utils.utils import caption_image , extract_latest_response_block
|
| 7 |
+
from .utils.tools import retrieve_data_for_orchestration
|
| 8 |
|
| 9 |
import re
|
| 10 |
from langchain_core.messages import SystemMessage
|
|
|
|
| 21 |
workflow.add_edge('chatbot2', END)
|
| 22 |
return workflow.compile(checkpointer=memory)
|
| 23 |
|
| 24 |
+
user_input_history = []
|
| 25 |
def orchestration_chat(user_input: str, image_base64=[]):
|
| 26 |
+
global user_input_history
|
| 27 |
+
user_input_history.append({'role': 'human', 'content': user_input})
|
| 28 |
+
|
| 29 |
if len(image_base64)>0:
|
| 30 |
caption_response = caption_image(image_base64, user_input)
|
| 31 |
+
user_input_history.append({'role': 'image_caption', 'content': caption_response})
|
| 32 |
+
|
| 33 |
print('Caption Response:', caption_response)
|
| 34 |
else:
|
| 35 |
caption_response =''
|
| 36 |
+
if len(user_input_history)>4:
|
| 37 |
+
user_input_history=user_input_history[-2:]
|
| 38 |
+
print('Length of history', len(user_input_history))
|
| 39 |
+
query_for_retrieval = ' '.join(
|
| 40 |
+
[msg['content'] for msg in user_input_history if msg['role'] in ('human', 'image_caption')]
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
influencers_data = retrieve_data_for_orchestration(query_for_retrieval)
|
| 44 |
|
| 45 |
agent = orchestration_graph()
|
| 46 |
config = {"configurable": {"thread_id": "orchestration-thread"}}
|
| 47 |
+
response = agent.invoke({"messages": [{'role':'human','content':user_input},
|
| 48 |
+
{'role': 'function', 'name': 'data_of_influencers', 'content': influencers_data},
|
| 49 |
+
{'role':'function','name':'information_of_image','content':caption_response}]}, config)['messages']
|
| 50 |
+
print('Orchestrator Response', response)
|
| 51 |
response=llm_gpt.with_structured_output(ValidationFormatter).invoke(extract_latest_response_block(response))
|
| 52 |
return response
|
| 53 |
|
orchestration_agent/utils/nodes.py
CHANGED
|
@@ -6,6 +6,8 @@ from .state import ToolResponseFormatter, UserReferenceResponseFormatter
|
|
| 6 |
|
| 7 |
|
| 8 |
def tool_return_node(state):
|
|
|
|
|
|
|
| 9 |
history = state["messages"]
|
| 10 |
template = [SystemMessage(content=tool_return_prompt)] + history
|
| 11 |
# print(template)
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
def tool_return_node(state):
|
| 9 |
+
if len(state["messages"]) > 23:
|
| 10 |
+
state["messages"] = state["messages"][-18:]
|
| 11 |
history = state["messages"]
|
| 12 |
template = [SystemMessage(content=tool_return_prompt)] + history
|
| 13 |
# print(template)
|
orchestration_agent/utils/prompts.py
CHANGED
|
@@ -34,7 +34,7 @@ Your job is to:
|
|
| 34 |
1. **Read the user's message carefully** and identify their intent.
|
| 35 |
2. **Return exactly two things**:
|
| 36 |
- `tools`: a Python-style list of tool names (from the ordered list below) that match the user’s intent.
|
| 37 |
-
- `query_response`: a short, friendly, and helpful reply that aligns with the tools you've selected.
|
| 38 |
|
| 39 |
Your `tools` output must:
|
| 40 |
- Always be a **Python-style list**, even if only one tool is selected.
|
|
@@ -46,6 +46,7 @@ Your `query_response` must:
|
|
| 46 |
- Be aligned with the selected tools.
|
| 47 |
- Never say something is impossible if a tool exists for it.
|
| 48 |
|
|
|
|
| 49 |
---
|
| 50 |
|
| 51 |
### Available Tools (in execution order — use this order in your response):
|
|
@@ -99,14 +100,14 @@ Your `query_response` must:
|
|
| 99 |
|
| 100 |
### Influencer Assistant Note:
|
| 101 |
|
| 102 |
-
You're also an intelligent assistant for brands looking to collaborate with influencers. If the user asks about influencer suggestions, trends, or insights
|
| 103 |
|
| 104 |
---
|
| 105 |
|
| 106 |
### Output Format (always this exact format):
|
| 107 |
|
| 108 |
"tools": ["tool_1", "tool_2"], // or [] if nothing applies
|
| 109 |
-
"query_response": "Short, clear, and friendly message to the user"
|
| 110 |
|
| 111 |
"""
|
| 112 |
|
|
|
|
| 34 |
1. **Read the user's message carefully** and identify their intent.
|
| 35 |
2. **Return exactly two things**:
|
| 36 |
- `tools`: a Python-style list of tool names (from the ordered list below) that match the user’s intent.
|
| 37 |
+
- `query_response`: a short, friendly, and helpful reply that aligns with the tools you've selected. Also return the relevant data of influencers aligning with the uer query only if the data of influencers.
|
| 38 |
|
| 39 |
Your `tools` output must:
|
| 40 |
- Always be a **Python-style list**, even if only one tool is selected.
|
|
|
|
| 46 |
- Be aligned with the selected tools.
|
| 47 |
- Never say something is impossible if a tool exists for it.
|
| 48 |
|
| 49 |
+
|
| 50 |
---
|
| 51 |
|
| 52 |
### Available Tools (in execution order — use this order in your response):
|
|
|
|
| 100 |
|
| 101 |
### Influencer Assistant Note:
|
| 102 |
|
| 103 |
+
You're also an intelligent assistant for brands looking to collaborate with influencers. If the user asks about influencer suggestions, trends, or insights , give detailed, useful replies using the influencer data provided to you.
|
| 104 |
|
| 105 |
---
|
| 106 |
|
| 107 |
### Output Format (always this exact format):
|
| 108 |
|
| 109 |
"tools": ["tool_1", "tool_2"], // or [] if nothing applies
|
| 110 |
+
"query_response": "Short, clear, and friendly message to the user. Give the data of influencers too if provided to you."
|
| 111 |
|
| 112 |
"""
|
| 113 |
|
orchestration_agent/utils/tools.py
CHANGED
|
@@ -4,25 +4,14 @@ import pandas as pd
|
|
| 4 |
import numpy as np
|
| 5 |
from utils.models_loader import ST
|
| 6 |
import json
|
|
|
|
| 7 |
|
| 8 |
-
|
| 9 |
-
def retrieve_tool(business_details):
|
| 10 |
'''
|
| 11 |
Always invoke this tool.
|
| 12 |
Retrieve influencer's data by semantic search of **business details**.
|
| 13 |
'''
|
| 14 |
-
|
| 15 |
-
csv_path = 'extracted_data.csv'
|
| 16 |
-
df = pd.read_csv(csv_path)
|
| 17 |
-
|
| 18 |
-
# === Parse stored embeddings ===
|
| 19 |
-
df['embeddings'] = df['embeddings'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
|
| 20 |
-
embeddings = np.vstack(df['embeddings'].values).astype('float32')
|
| 21 |
-
|
| 22 |
-
# === Build FAISS index ===
|
| 23 |
-
dimension = embeddings.shape[1]
|
| 24 |
-
index = faiss.IndexFlatL2(dimension)
|
| 25 |
-
index.add(embeddings)
|
| 26 |
|
| 27 |
# === Encode the query and search ===
|
| 28 |
query_embedding = ST.encode(str(business_details)).reshape(1, -1).astype('float32')
|
|
@@ -42,4 +31,55 @@ def retrieve_tool(business_details):
|
|
| 42 |
|
| 43 |
return results
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
|
|
|
| 4 |
import numpy as np
|
| 5 |
from utils.models_loader import ST
|
| 6 |
import json
|
| 7 |
+
from utils.load_embeddings import df, embeddings , index
|
| 8 |
|
| 9 |
+
def retrieve_data_for_analytics(business_details):
|
|
|
|
| 10 |
'''
|
| 11 |
Always invoke this tool.
|
| 12 |
Retrieve influencer's data by semantic search of **business details**.
|
| 13 |
'''
|
| 14 |
+
df = pd.read_csv('extracted_data.csv')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# === Encode the query and search ===
|
| 17 |
query_embedding = ST.encode(str(business_details)).reshape(1, -1).astype('float32')
|
|
|
|
| 31 |
|
| 32 |
return results
|
| 33 |
|
| 34 |
+
def retrieve_data_for_orchestration(query):
|
| 35 |
+
df = pd.read_csv('extracted_data.csv')
|
| 36 |
+
# === Encode and normalize query ===
|
| 37 |
+
query_embedding = ST.encode(str(query)).reshape(1, -1).astype('float32')
|
| 38 |
+
faiss.normalize_L2(query_embedding)
|
| 39 |
+
|
| 40 |
+
# === Search with high top_k to filter later ===
|
| 41 |
+
top_k = len(df)
|
| 42 |
+
distances, indices = index.search(query_embedding, top_k)
|
| 43 |
+
|
| 44 |
+
# === Filter by similarity threshold (e.g., 0.70) ===
|
| 45 |
+
similarity_threshold = 0.60
|
| 46 |
+
selected = [(idx, sim) for idx, sim in zip(indices[0], distances[0]) if sim >= similarity_threshold]
|
| 47 |
+
|
| 48 |
+
if not selected:
|
| 49 |
+
return "❌ No influencers found."
|
| 50 |
+
|
| 51 |
+
# === Format results ===
|
| 52 |
+
outer_list = []
|
| 53 |
+
for rank, (idx, sim) in enumerate(selected, 1):
|
| 54 |
+
row = df.iloc[idx]
|
| 55 |
+
res = {
|
| 56 |
+
'rank': rank,
|
| 57 |
+
'username': row['username'],
|
| 58 |
+
# 'story': row['story'],
|
| 59 |
+
'visible_text_or_brandings': row['visible_texts_or_brandings'],
|
| 60 |
+
'likesCount': row['likesCount'],
|
| 61 |
+
'commentCount': row['commentCount'],
|
| 62 |
+
'product_or_service_details': row['product_or_service_details'],
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
inner_list = [
|
| 66 |
+
f"[{res['rank']}]. The influencer name is: **{res['username']}** — Likes: **{res['likesCount']}**, Comments: **{res['commentCount']}**",
|
| 67 |
+
# f"The story of that particular video is:\n{res['story']}",
|
| 68 |
+
f"The branding or promotion done is:\n{res['visible_text_or_brandings']}",
|
| 69 |
+
f"The details of product or service is:\n{res['product_or_service_details']}"
|
| 70 |
+
]
|
| 71 |
+
outer_list.append(inner_list)
|
| 72 |
+
|
| 73 |
+
# === Flatten and tokenize all lines ===
|
| 74 |
+
flat_lines = [line for sublist in outer_list for line in sublist]
|
| 75 |
+
tokens = ' '.join(flat_lines).split()
|
| 76 |
+
|
| 77 |
+
# === If >1000 tokens, trim from back, keeping top ranks ===
|
| 78 |
+
if len(tokens) > 1000:
|
| 79 |
+
tokens = tokens[:1000]
|
| 80 |
+
trimmed_response = ' '.join(tokens)
|
| 81 |
+
return trimmed_response
|
| 82 |
+
else:
|
| 83 |
+
return '\n\n'.join(flat_lines)
|
| 84 |
+
|
| 85 |
|
orchestration_agent/utils/utils.py
CHANGED
|
@@ -6,8 +6,7 @@ import os
|
|
| 6 |
from .prompts import captioning_prompt
|
| 7 |
from utils.models_loader import llm
|
| 8 |
from langchain_core.messages import FunctionMessage , AIMessage
|
| 9 |
-
from .tools import
|
| 10 |
-
from.prompts import show_analytics_prompt
|
| 11 |
|
| 12 |
|
| 13 |
def caption_image(image_base64,user_input):
|
|
@@ -41,12 +40,7 @@ def caption_image(image_base64,user_input):
|
|
| 41 |
|
| 42 |
|
| 43 |
def show_analytics(business_details):
|
| 44 |
-
|
| 45 |
-
tool_response = retrieve_tool(str(business_details))
|
| 46 |
-
# template = [SystemMessage(content=show_analytics_prompt()),
|
| 47 |
-
# HumanMessage(content=str(business_details)),
|
| 48 |
-
# ToolMessage(content=tool_response, tool_call_id='analytics_id')]
|
| 49 |
-
# response = llm.invoke(template).content
|
| 50 |
return tool_response
|
| 51 |
|
| 52 |
def extract_latest_response_block(response):
|
|
@@ -65,5 +59,5 @@ def extract_latest_response_block(response):
|
|
| 65 |
break
|
| 66 |
else:
|
| 67 |
temp_block = []
|
| 68 |
-
|
| 69 |
return latest_block
|
|
|
|
| 6 |
from .prompts import captioning_prompt
|
| 7 |
from utils.models_loader import llm
|
| 8 |
from langchain_core.messages import FunctionMessage , AIMessage
|
| 9 |
+
from .tools import retrieve_data_for_analytics
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
def caption_image(image_base64,user_input):
|
|
|
|
| 40 |
|
| 41 |
|
| 42 |
def show_analytics(business_details):
|
| 43 |
+
tool_response = retrieve_data_for_analytics(str(business_details))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
return tool_response
|
| 45 |
|
| 46 |
def extract_latest_response_block(response):
|
|
|
|
| 59 |
break
|
| 60 |
else:
|
| 61 |
temp_block = []
|
| 62 |
+
print('The latest block', latest_block)
|
| 63 |
return latest_block
|
requirements.txt
CHANGED
|
@@ -15,4 +15,7 @@ langmem
|
|
| 15 |
streamlit
|
| 16 |
requests
|
| 17 |
langchain_openai
|
|
|
|
|
|
|
|
|
|
| 18 |
|
|
|
|
| 15 |
streamlit
|
| 16 |
requests
|
| 17 |
langchain_openai
|
| 18 |
+
nltk
|
| 19 |
+
scikit-learn
|
| 20 |
+
pandas
|
| 21 |
|
utils/__pycache__/data_loader.cpython-312.pyc
CHANGED
|
Binary files a/utils/__pycache__/data_loader.cpython-312.pyc and b/utils/__pycache__/data_loader.cpython-312.pyc differ
|
|
|
utils/__pycache__/models_loader.cpython-312.pyc
CHANGED
|
Binary files a/utils/__pycache__/models_loader.cpython-312.pyc and b/utils/__pycache__/models_loader.cpython-312.pyc differ
|
|
|
utils/data_loader.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
| 1 |
|
| 2 |
from datasets import load_dataset
|
| 3 |
-
|
| 4 |
-
|
| 5 |
dataset = load_dataset("subashdvorak/tiktok-formatted-story-v2", revision="embedded")
|
| 6 |
data = dataset['train'].add_faiss_index('embeddings')
|
|
|
|
| 7 |
def load_influencer_data():
|
| 8 |
return data
|
| 9 |
|
|
|
|
| 1 |
|
| 2 |
from datasets import load_dataset
|
|
|
|
|
|
|
| 3 |
dataset = load_dataset("subashdvorak/tiktok-formatted-story-v2", revision="embedded")
|
| 4 |
data = dataset['train'].add_faiss_index('embeddings')
|
| 5 |
+
|
| 6 |
def load_influencer_data():
|
| 7 |
return data
|
| 8 |
|
utils/load_embeddings.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import ast
|
| 3 |
+
import faiss
|
| 4 |
+
import pandas as pd
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def load_index_once():
|
| 8 |
+
df = pd.read_csv('extracted_data.csv')
|
| 9 |
+
df['embeddings'] = df['embeddings'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
|
| 10 |
+
embeddings = np.vstack(df['embeddings'].values).astype('float32')
|
| 11 |
+
faiss.normalize_L2(embeddings)
|
| 12 |
+
index = faiss.IndexFlatIP(embeddings.shape[1])
|
| 13 |
+
index.add(embeddings)
|
| 14 |
+
return df, embeddings, index
|
| 15 |
+
|
| 16 |
+
print('Loading Embeddings...........')
|
| 17 |
+
# Load once on script start
|
| 18 |
+
df, embeddings, index = load_index_once()
|
utils/models_loader.py
CHANGED
|
@@ -51,28 +51,14 @@ class HFEmbeddingAPI:
|
|
| 51 |
return np.array(embeddings[0]) if len(embeddings) == 1 else np.array(embeddings)
|
| 52 |
|
| 53 |
# Instantiate your API-backed "SentenceTransformer"
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
ST = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
|
| 60 |
-
|
| 61 |
-
# Initiated the models for ideation
|
| 62 |
-
|
| 63 |
-
# ideator_llm = ChatGroq(
|
| 64 |
-
# model="llama-3.1-8b-instant",
|
| 65 |
-
# temperature=0.7,
|
| 66 |
-
# max_tokens=500,
|
| 67 |
|
| 68 |
-
# )
|
| 69 |
|
| 70 |
-
# critic_llm = ChatGroq(
|
| 71 |
-
# model="llama-3.3-70b-versatile",
|
| 72 |
-
# temperature=0.7,
|
| 73 |
-
# max_tokens=500,
|
| 74 |
|
| 75 |
-
# )
|
| 76 |
|
| 77 |
improver_llm = ChatOpenAI(
|
| 78 |
model="gpt-4o-mini",
|
|
@@ -80,21 +66,11 @@ improver_llm = ChatOpenAI(
|
|
| 80 |
max_tokens=500,
|
| 81 |
)
|
| 82 |
|
| 83 |
-
|
| 84 |
-
# model="llama3-8b-8192",
|
| 85 |
-
# temperature=0.7,
|
| 86 |
-
# max_tokens=500,
|
| 87 |
-
# )
|
| 88 |
ideator_llm = llm
|
| 89 |
critic_llm = llm
|
| 90 |
validator_llm = llm
|
| 91 |
|
| 92 |
|
| 93 |
-
# validator_llm = ChatGroq(
|
| 94 |
-
# model="llama-3.3-70b-versatile",
|
| 95 |
-
# temperature=0.7,
|
| 96 |
-
# max_tokens=500,
|
| 97 |
-
|
| 98 |
-
# )
|
| 99 |
|
| 100 |
|
|
|
|
| 51 |
return np.array(embeddings[0]) if len(embeddings) == 1 else np.array(embeddings)
|
| 52 |
|
| 53 |
# Instantiate your API-backed "SentenceTransformer"
|
| 54 |
+
ST = HFEmbeddingAPI(
|
| 55 |
+
api_url="https://router.huggingface.co/hf-inference/models/mixedbread-ai/mxbai-embed-large-v1/pipeline/feature-extraction",
|
| 56 |
+
token=os.environ.get('HUGGINGFACEHUB_ACCESS_TOKEN')
|
| 57 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
+
# ST = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
|
|
|
| 62 |
|
| 63 |
improver_llm = ChatOpenAI(
|
| 64 |
model="gpt-4o-mini",
|
|
|
|
| 66 |
max_tokens=500,
|
| 67 |
)
|
| 68 |
|
| 69 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
ideator_llm = llm
|
| 71 |
critic_llm = llm
|
| 72 |
validator_llm = llm
|
| 73 |
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
|