subashpoudel commited on
Commit
6c655a3
·
1 Parent(s): fbc17f4

Next commit

Browse files
__pycache__/main.cpython-312.pyc CHANGED
Binary files a/__pycache__/main.cpython-312.pyc and b/__pycache__/main.cpython-312.pyc differ
 
brainstroming_agent/utils/__pycache__/nodes.cpython-312.pyc CHANGED
Binary files a/brainstroming_agent/utils/__pycache__/nodes.cpython-312.pyc and b/brainstroming_agent/utils/__pycache__/nodes.cpython-312.pyc differ
 
brainstroming_agent/utils/__pycache__/tools.cpython-312.pyc CHANGED
Binary files a/brainstroming_agent/utils/__pycache__/tools.cpython-312.pyc and b/brainstroming_agent/utils/__pycache__/tools.cpython-312.pyc differ
 
brainstroming_agent/utils/__pycache__/utils.cpython-312.pyc CHANGED
Binary files a/brainstroming_agent/utils/__pycache__/utils.cpython-312.pyc and b/brainstroming_agent/utils/__pycache__/utils.cpython-312.pyc differ
 
brainstroming_agent/utils/nodes.py CHANGED
@@ -7,10 +7,8 @@ from utils.models_loader import llm , ST
7
  from utils.data_loader import load_influencer_data
8
  from groq import Groq
9
  import os
10
- from .prompts import image_captioning_prompt , initial_story_prompt , refined_story_prompt , brainstroming_prompt , final_story_prompt
11
  from langgraph.prebuilt import create_react_agent
12
- from pydantic import BaseModel , Field
13
- from langchain_core.tools import tool
14
  from .state import BrainstromTopicFormatter
15
 
16
 
@@ -75,9 +73,6 @@ def retrieve(state: State) -> State:
75
  return state
76
 
77
  def generate_story(state:State)-> State:
78
- retrievals_from_tool = retrieve_tool(state.idea)
79
- # tools=[retrieve_tool]
80
-
81
  react_agent=create_react_agent(
82
  model=llm,
83
  tools=[]
@@ -102,7 +97,6 @@ def generate_story(state:State)-> State:
102
  response = response['messages'][-1].content
103
  print('The genrated story: ', response)
104
  state.stories.append(response)
105
- # return State(messages="Story generated", topic=state.topic,stories=state.stories)
106
  return state
107
 
108
 
 
7
  from utils.data_loader import load_influencer_data
8
  from groq import Groq
9
  import os
10
+ from .prompts import image_captioning_prompt , initial_story_prompt , refined_story_prompt , brainstroming_prompt
11
  from langgraph.prebuilt import create_react_agent
 
 
12
  from .state import BrainstromTopicFormatter
13
 
14
 
 
73
  return state
74
 
75
  def generate_story(state:State)-> State:
 
 
 
76
  react_agent=create_react_agent(
77
  model=llm,
78
  tools=[]
 
97
  response = response['messages'][-1].content
98
  print('The genrated story: ', response)
99
  state.stories.append(response)
 
100
  return state
101
 
102
 
brainstroming_agent/utils/tools.py CHANGED
@@ -1,85 +1,53 @@
1
- from langchain_groq import ChatGroq
2
- from pydantic import BaseModel, Field
3
- from dotenv import load_dotenv
4
- load_dotenv()
5
  import os
6
  import numpy as np
7
- from langchain_core.tools import tool
8
- # from utils.data_loader import load_influencer_data
9
- from utils.models_loader import ST , llm
10
  import numpy as np
11
- from langchain_core.messages import SystemMessage
12
- import re
13
  import faiss
14
- import ast
15
- import pandas as pd
16
- from .state import QueryFormatter
17
- from utils.load_embeddings import embeddings , index
18
 
19
- os.environ['GROQ_API_KEY']=os.getenv('GROQ_API_KEY')
20
- # @tool("influencers_data_retrieval_tool", args_schema=QueryFormatter, return_direct=False,description="Retrieve influencer-related data for a given query.")
21
  def retrieve_tool(video_topic):
22
  '''
23
  Always invoke this tool.
24
  Retrieve influencer's data by semantic search of **video topic**.
25
  '''
26
- df = pd.read_csv('extracted_data.csv')
27
-
28
- query_embedding = ST.encode(str(video_topic)).reshape(1, -1).astype('float32')
29
- top_k=10
30
- distances, indices = index.search(query_embedding, top_k)
31
-
32
- # === Format results ===
33
- outer_list = []
34
- for i, idx in enumerate(indices[0]):
35
- res = {
36
- 'rank': i + 1,
37
- 'username': df.iloc[idx]['username'],
38
- 'story': df.iloc[idx]['story'],
39
- 'visible_text_or_brandings': df.iloc[idx]['visible_texts_or_brandings'],
40
- 'likesCount': df.iloc[idx]['likesCount'],
41
- 'commentCount': df.iloc[idx]['commentCount'],
42
- }
43
-
44
- inner_list = []
45
- inner_list.append(f"[{res['rank']}]. The influencer name is: **{res['username']}** — Likes: **{res['likesCount']}**, Comments: **{res['commentCount']}**")
46
- inner_list.append(f"The story of that particular video is:\n{res['story']}")
47
- inner_list.append(f"The branding or promotion done is:\n{res['visible_text_or_brandings']}")
48
 
49
- outer_list.append(inner_list)
50
-
51
- return str(outer_list)
52
-
53
- def retrieve_manual(video_topic):
54
- '''
55
- Always invoke this tool.
56
- Retrieve influencer's data by semantic search of **video topic**.
57
- '''
58
- df = pd.read_csv('extracted_data.csv')
59
- query_embedding = ST.encode(str(video_topic)).reshape(1, -1).astype('float32')
60
- top_k=5
61
  distances, indices = index.search(query_embedding, top_k)
62
 
 
 
63
 
 
 
64
 
65
  # === Format results ===
66
  outer_list = []
67
- for i, idx in enumerate(indices[0]):
 
68
  res = {
69
- 'rank': i + 1,
70
- 'username': df.iloc[idx]['username'],
71
- 'story': df.iloc[idx]['story'],
72
- 'visible_text_or_brandings': df.iloc[idx]['visible_texts_or_brandings'],
73
- 'likesCount': df.iloc[idx]['likesCount'],
74
- 'commentCount': df.iloc[idx]['commentCount'],
75
  }
76
 
77
- inner_list = []
78
- inner_list.append(f"[{res['rank']}]. The influencer name is: **{res['username']}** — Likes: **{res['likesCount']}**, Comments: **{res['commentCount']}**")
79
- inner_list.append(f"The story of that particular video is:\n{res['story']}")
80
- inner_list.append(f"The branding or promotion done is:\n{res['visible_text_or_brandings']}")
81
-
82
  outer_list.append(inner_list)
83
 
84
- return str(outer_list)
 
 
 
 
85
 
 
1
+
 
 
 
2
  import os
3
  import numpy as np
4
+ from utils.models_loader import embedding_model , llm
 
 
5
  import numpy as np
 
 
6
  import faiss
7
+ import tiktoken
8
+ from utils.load_embeddings import index , df
9
+ from utils.utils import clean_text
 
10
 
 
 
11
  def retrieve_tool(video_topic):
12
  '''
13
  Always invoke this tool.
14
  Retrieve influencer's data by semantic search of **video topic**.
15
  '''
16
+ query_embedding = np.array(embedding_model.embed_query(str(video_topic))).reshape(1, -1).astype('float32')
17
+ faiss.normalize_L2(query_embedding)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ top_k = len(df)
 
 
 
 
 
 
 
 
 
 
 
20
  distances, indices = index.search(query_embedding, top_k)
21
 
22
+ similarity_threshold = 0.35
23
+ selected = [(idx, sim) for idx, sim in zip(indices[0], distances[0]) if sim >= similarity_threshold]
24
 
25
+ if not selected:
26
+ return "No influencers found."
27
 
28
  # === Format results ===
29
  outer_list = []
30
+ for rank, (idx, sim) in enumerate(selected, 1):
31
+ row = df.iloc[idx]
32
  res = {
33
+ 'rank': rank,
34
+ 'username': row['username'],
35
+ 'visible_text_or_brandings': row['visible_texts_or_brandings'],
36
+ 'likesCount': row['likesCount'],
37
+ 'commentCount': row['commentCount'],
38
+ 'product_or_service_details': row['product_or_service_details'],
39
  }
40
 
41
+ inner_list = [
42
+ f"[{res['rank']}]. The influencer name is: **{res['username']}** — Likes: **{res['likesCount']}**, Comments: **{res['commentCount']}**",
43
+ f"The branding or promotion done is:\n{res['visible_text_or_brandings']}",
44
+ f"The details of product or service is:\n{res['product_or_service_details']}"
45
+ ]
46
  outer_list.append(inner_list)
47
 
48
+ cleaned_response = clean_text(str(outer_list))
49
+ encoding = tiktoken.encoding_for_model('gpt-4o-mini')
50
+ tokens = encoding.encode(cleaned_response)
51
+ trimmed_response = tokens[:1000]
52
+ return encoding.decode(trimmed_response)
53
 
brainstroming_agent/utils/utils.py CHANGED
@@ -1,6 +1,6 @@
1
 
2
  from langchain_core.messages import SystemMessage, ToolMessage, HumanMessage
3
- from .tools import retrieve_tool , retrieve_manual
4
  import base64
5
  from PIL import Image
6
  from io import BytesIO
@@ -11,14 +11,7 @@ import os
11
  from langgraph.prebuilt import create_react_agent
12
  import pandas as pd
13
  from datasets import load_dataset
14
- import ast
15
- import faiss
16
- import re
17
- import numpy as np
18
- from utils.models_loader import ST , llm
19
-
20
-
21
-
22
 
23
  def generate_final_story(final_state):
24
  if 'preferred_topics' in final_state:
@@ -41,7 +34,7 @@ def generate_final_story(final_state):
41
  return final_state['stories'][-1]
42
  else:
43
  template = final_story_prompt(final_state)
44
- influencers_data = retrieve_manual(final_state)
45
  messages = [SystemMessage(content=template),
46
  ToolMessage(content=f'''The business details is:\n{str(final_state)}\nThe data of influencers is:\n{influencers_data}''',tool_call_id='final_story_tool')]
47
  react_agent=create_react_agent(
@@ -130,64 +123,7 @@ def save_to_db(business_details):
130
  matched_df = df[df.apply(row_matches, axis=1)]
131
  matched_df.to_csv('extracted_data.csv')
132
 
133
- def manual_retrieval(messages, business_details):
134
- # === Load CSV ===
135
- csv_path = 'extracted_data.csv'
136
- df = pd.read_csv(csv_path)
137
-
138
- # === Parse stored embeddings ===
139
- df['embeddings'] = df['embeddings'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
140
- embeddings = np.vstack(df['embeddings'].values).astype('float32')
141
-
142
- # === Build FAISS index ===
143
- dimension = embeddings.shape[1]
144
- index = faiss.IndexFlatL2(dimension)
145
- index.add(embeddings)
146
 
147
- # === Load SentenceTransformer model ===
148
-
149
- # === Encode the query and search ===
150
- query_embedding = ST.encode(str(messages)+str(business_details)).reshape(1, -1).astype('float32')
151
- top_k=3
152
- distances, indices = index.search(query_embedding, top_k)
153
-
154
- # === Function to extract sections 1 and 6 ===
155
- def extract_story_and_branding(full_story):
156
- full_story = full_story.replace('**6. Visible Texts or Brandings**', '**6. Visible Texts or Brandings:**')
157
- full_story = full_story.replace('**1. Story**', '**1. Story:**')
158
-
159
- pattern = (
160
- r"\*\*1\. Story:\*\*(.*?)(?=\*\*\d+\.\s)"
161
- r".*?"
162
- r"\*\*6\. Visible Texts or Brandings:\*\*(.*?)(?=\*\*\d+\.\s|$)"
163
- )
164
- match = re.search(pattern, full_story, re.DOTALL)
165
- if match:
166
- story_section = match.group(1).strip()
167
- branding_section = match.group(2).strip()
168
- return f"Story:\n{story_section}\n\nVisible Texts or Brandings:\n{branding_section}"
169
- else:
170
- return "Requested sections not found."
171
-
172
- # === Format results ===
173
- outer_list = []
174
- for i, idx in enumerate(indices[0]):
175
- res = {
176
- 'rank': i + 1,
177
- 'username': df.iloc[idx]['username'],
178
- 'agentic_story': df.iloc[idx]['agentic_story'],
179
- 'likesCount': df.iloc[idx]['likesCount'],
180
- 'commentCount': df.iloc[idx]['commentCount'],
181
- 'distance': distances[0][i]
182
- }
183
-
184
- inner_list = []
185
- inner_list.append(f"[{res['rank']}]. The influencer name is: **{res['username']}** — Likes: **{res['likesCount']}**, Comments: **{res['commentCount']}**")
186
- inner_list.append(f"The story of that particular video is:\n{extract_story_and_branding(res['agentic_story'])}")
187
- inner_list.append(f"Distance: {res['distance']:.4f}")
188
- outer_list.append(inner_list)
189
-
190
- return str(outer_list)
191
 
192
 
193
 
 
1
 
2
  from langchain_core.messages import SystemMessage, ToolMessage, HumanMessage
3
+ from .tools import retrieve_tool
4
  import base64
5
  from PIL import Image
6
  from io import BytesIO
 
11
  from langgraph.prebuilt import create_react_agent
12
  import pandas as pd
13
  from datasets import load_dataset
14
+ from utils.models_loader import llm
 
 
 
 
 
 
 
15
 
16
  def generate_final_story(final_state):
17
  if 'preferred_topics' in final_state:
 
34
  return final_state['stories'][-1]
35
  else:
36
  template = final_story_prompt(final_state)
37
+ influencers_data = retrieve_tool(final_state)
38
  messages = [SystemMessage(content=template),
39
  ToolMessage(content=f'''The business details is:\n{str(final_state)}\nThe data of influencers is:\n{influencers_data}''',tool_call_id='final_story_tool')]
40
  react_agent=create_react_agent(
 
123
  matched_df = df[df.apply(row_matches, axis=1)]
124
  matched_df.to_csv('extracted_data.csv')
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
 
129
 
ideation_agent/utils/__pycache__/tools.cpython-312.pyc CHANGED
Binary files a/ideation_agent/utils/__pycache__/tools.cpython-312.pyc and b/ideation_agent/utils/__pycache__/tools.cpython-312.pyc differ
 
ideation_agent/utils/tools.py CHANGED
@@ -5,8 +5,10 @@ import pandas as pd
5
  import numpy as np
6
  import ast
7
  import faiss
8
- from utils.models_loader import ST
9
- from utils.load_embeddings import embeddings , index
 
 
10
 
11
  @tool("influencers_data_retrieval_tool", args_schema=QueryFormatter, return_direct=False,description="Retrieve influencer-related data for a given query.")
12
  def retrieve_tool(business_details):
@@ -14,34 +16,40 @@ def retrieve_tool(business_details):
14
  Always invoke this tool.
15
  Retrieve influencer's data by semantic search of **business details**.
16
  '''
17
- # === Load CSV ===
18
- csv_path = 'extracted_data.csv'
19
- df = pd.read_csv(csv_path)
20
 
21
-
22
- query_embedding = ST.encode(str(business_details)).reshape(1, -1).astype('float32')
23
- top_k=7
24
  distances, indices = index.search(query_embedding, top_k)
25
 
 
 
 
 
 
 
26
  # === Format results ===
27
  outer_list = []
28
- for i, idx in enumerate(indices[0]):
 
29
  res = {
30
- 'rank': i + 1,
31
- 'username': df.iloc[idx]['username'],
32
- 'story': df.iloc[idx]['story'],
33
- 'visible_text_or_brandings': df.iloc[idx]['visible_texts_or_brandings'],
34
- 'likesCount': df.iloc[idx]['likesCount'],
35
- 'commentCount': df.iloc[idx]['commentCount'],
36
- 'product_or_service_details': df.iloc[idx]['product_or_service_details'],
37
-
38
  }
39
 
40
- inner_list = []
41
- inner_list.append(f"[{res['rank']}]. The influencer name is: **{res['username']}** — Likes: **{res['likesCount']}**, Comments: **{res['commentCount']}**")
42
- inner_list.append(f"The story of that particular video is:\n{res['story']}")
43
- inner_list.append(f"The branding or promotion done is:\n{res['visible_text_or_brandings']}")
44
- inner_list.append(f"The details of product or service is:\n{res['product_or_service_details']}")
45
  outer_list.append(inner_list)
46
 
47
- return str(outer_list)
 
 
 
 
 
5
  import numpy as np
6
  import ast
7
  import faiss
8
+ import tiktoken
9
+ from utils.models_loader import embedding_model
10
+ from utils.load_embeddings import embeddings , index , df
11
+ from utils.utils import clean_text
12
 
13
  @tool("influencers_data_retrieval_tool", args_schema=QueryFormatter, return_direct=False,description="Retrieve influencer-related data for a given query.")
14
  def retrieve_tool(business_details):
 
16
  Always invoke this tool.
17
  Retrieve influencer's data by semantic search of **business details**.
18
  '''
19
+ query_embedding = np.array(embedding_model.embed_query(str(business_details))).reshape(1, -1).astype('float32')
20
+ faiss.normalize_L2(query_embedding)
 
21
 
22
+ top_k = len(df)
 
 
23
  distances, indices = index.search(query_embedding, top_k)
24
 
25
+ similarity_threshold = 0.35
26
+ selected = [(idx, sim) for idx, sim in zip(indices[0], distances[0]) if sim >= similarity_threshold]
27
+
28
+ if not selected:
29
+ return "No influencers found."
30
+
31
  # === Format results ===
32
  outer_list = []
33
+ for rank, (idx, sim) in enumerate(selected, 1):
34
+ row = df.iloc[idx]
35
  res = {
36
+ 'rank': rank,
37
+ 'username': row['username'],
38
+ 'visible_text_or_brandings': row['visible_texts_or_brandings'],
39
+ 'likesCount': row['likesCount'],
40
+ 'commentCount': row['commentCount'],
41
+ 'product_or_service_details': row['product_or_service_details'],
 
 
42
  }
43
 
44
+ inner_list = [
45
+ f"[{res['rank']}]. The influencer name is: **{res['username']}** — Likes: **{res['likesCount']}**, Comments: **{res['commentCount']}**",
46
+ f"The branding or promotion done is:\n{res['visible_text_or_brandings']}",
47
+ f"The details of product or service is:\n{res['product_or_service_details']}"
48
+ ]
49
  outer_list.append(inner_list)
50
 
51
+ cleaned_response = clean_text(str(outer_list))
52
+ encoding = tiktoken.encoding_for_model('gpt-4o-mini')
53
+ tokens = encoding.encode(cleaned_response)
54
+ trimmed_response = tokens[:1000]
55
+ return encoding.decode(trimmed_response)
orchestration_agent/utils/tools.py CHANGED
@@ -2,51 +2,50 @@ import faiss
2
  import ast
3
  import pandas as pd
4
  import numpy as np
5
- from utils.models_loader import ST
6
- import json
7
  from utils.load_embeddings import df, embeddings , index
 
 
 
8
 
9
  def retrieve_data_for_analytics(business_details):
10
  '''
11
  Always invoke this tool.
12
  Retrieve influencer's data by semantic search of **business details**.
13
  '''
14
- df = pd.read_csv('extracted_data.csv')
15
 
16
  # === Encode the query and search ===
17
- query_embedding = ST.encode(str(business_details)).reshape(1, -1).astype('float32')
18
- top_k = 30
19
  distances, indices = index.search(query_embedding, top_k)
20
 
21
  # === Format results ===
22
  results = []
23
  for i, idx in enumerate(indices[0]):
 
 
24
  res = {
25
  'url': df.iloc[idx]['videoUrl'],
26
  'username': df.iloc[idx]['username'],
27
- 'likesCount': int(df.iloc[idx]['likesCount']),
28
- 'commentCount': int(df.iloc[idx]['commentCount'])
29
  }
30
  results.append(res)
31
 
32
  return results
33
 
34
  def retrieve_data_for_orchestration(query):
35
- df = pd.read_csv('extracted_data.csv')
36
- # === Encode and normalize query ===
37
- query_embedding = ST.encode(str(query)).reshape(1, -1).astype('float32')
38
  faiss.normalize_L2(query_embedding)
39
 
40
- # === Search with high top_k to filter later ===
41
  top_k = len(df)
42
  distances, indices = index.search(query_embedding, top_k)
43
 
44
- # === Filter by similarity threshold (e.g., 0.70) ===
45
- similarity_threshold = 0.60
46
  selected = [(idx, sim) for idx, sim in zip(indices[0], distances[0]) if sim >= similarity_threshold]
47
 
48
  if not selected:
49
- return "No influencers found."
50
 
51
  # === Format results ===
52
  outer_list = []
@@ -55,7 +54,6 @@ def retrieve_data_for_orchestration(query):
55
  res = {
56
  'rank': rank,
57
  'username': row['username'],
58
- # 'story': row['story'],
59
  'visible_text_or_brandings': row['visible_texts_or_brandings'],
60
  'likesCount': row['likesCount'],
61
  'commentCount': row['commentCount'],
@@ -64,22 +62,17 @@ def retrieve_data_for_orchestration(query):
64
 
65
  inner_list = [
66
  f"[{res['rank']}]. The influencer name is: **{res['username']}** — Likes: **{res['likesCount']}**, Comments: **{res['commentCount']}**",
67
- # f"The story of that particular video is:\n{res['story']}",
68
  f"The branding or promotion done is:\n{res['visible_text_or_brandings']}",
69
  f"The details of product or service is:\n{res['product_or_service_details']}"
70
  ]
71
  outer_list.append(inner_list)
72
 
73
- # === Flatten and tokenize all lines ===
74
- flat_lines = [line for sublist in outer_list for line in sublist]
75
- tokens = ' '.join(flat_lines).split()
 
 
 
76
 
77
- # === If >1000 tokens, trim from back, keeping top ranks ===
78
- if len(tokens) > 1000:
79
- tokens = tokens[:1000]
80
- trimmed_response = ' '.join(tokens)
81
- return trimmed_response
82
- else:
83
- return '\n\n'.join(flat_lines)
84
 
85
 
 
2
  import ast
3
  import pandas as pd
4
  import numpy as np
 
 
5
  from utils.load_embeddings import df, embeddings , index
6
+ from utils.models_loader import embedding_model
7
+ from utils.utils import clean_text
8
+ import tiktoken
9
 
10
  def retrieve_data_for_analytics(business_details):
11
  '''
12
  Always invoke this tool.
13
  Retrieve influencer's data by semantic search of **business details**.
14
  '''
15
+ # df = pd.read_csv('extracted_data.csv')
16
 
17
  # === Encode the query and search ===
18
+ query_embedding = np.array(embedding_model.embed_query(str(business_details))).reshape(1, -1).astype('float32')
19
+ top_k = 10
20
  distances, indices = index.search(query_embedding, top_k)
21
 
22
  # === Format results ===
23
  results = []
24
  for i, idx in enumerate(indices[0]):
25
+ likes = df.iloc[idx]['likesCount']
26
+ comments = df.iloc[idx]['commentCount']
27
  res = {
28
  'url': df.iloc[idx]['videoUrl'],
29
  'username': df.iloc[idx]['username'],
30
+ 'likesCount': int(likes) if pd.notnull(likes) else None,
31
+ 'commentCount': int(comments) if pd.notnull(comments) else None
32
  }
33
  results.append(res)
34
 
35
  return results
36
 
37
  def retrieve_data_for_orchestration(query):
38
+ query_embedding = np.array(embedding_model.embed_query(str(query))).reshape(1, -1).astype('float32')
 
 
39
  faiss.normalize_L2(query_embedding)
40
 
 
41
  top_k = len(df)
42
  distances, indices = index.search(query_embedding, top_k)
43
 
44
+ similarity_threshold = 0.35
 
45
  selected = [(idx, sim) for idx, sim in zip(indices[0], distances[0]) if sim >= similarity_threshold]
46
 
47
  if not selected:
48
+ return "No influencers found."
49
 
50
  # === Format results ===
51
  outer_list = []
 
54
  res = {
55
  'rank': rank,
56
  'username': row['username'],
 
57
  'visible_text_or_brandings': row['visible_texts_or_brandings'],
58
  'likesCount': row['likesCount'],
59
  'commentCount': row['commentCount'],
 
62
 
63
  inner_list = [
64
  f"[{res['rank']}]. The influencer name is: **{res['username']}** — Likes: **{res['likesCount']}**, Comments: **{res['commentCount']}**",
 
65
  f"The branding or promotion done is:\n{res['visible_text_or_brandings']}",
66
  f"The details of product or service is:\n{res['product_or_service_details']}"
67
  ]
68
  outer_list.append(inner_list)
69
 
70
+ cleaned_response = clean_text(str(outer_list))
71
+ encoding = tiktoken.encoding_for_model('gpt-4o-mini')
72
+ tokens = encoding.encode(cleaned_response)
73
+ trimmed_response = tokens[:1000]
74
+ return encoding.decode(trimmed_response)
75
+
76
 
 
 
 
 
 
 
 
77
 
78
 
orchestration_agent/utils/utils.py CHANGED
@@ -7,6 +7,7 @@ from .prompts import captioning_prompt
7
  from utils.models_loader import llm
8
  from langchain_core.messages import FunctionMessage , AIMessage
9
  from .tools import retrieve_data_for_analytics
 
10
 
11
 
12
  def caption_image(image_base64,user_input):
@@ -61,3 +62,5 @@ def extract_latest_response_block(response):
61
  temp_block = []
62
  print('The latest block', latest_block)
63
  return latest_block
 
 
 
7
  from utils.models_loader import llm
8
  from langchain_core.messages import FunctionMessage , AIMessage
9
  from .tools import retrieve_data_for_analytics
10
+ import re
11
 
12
 
13
  def caption_image(image_base64,user_input):
 
62
  temp_block = []
63
  print('The latest block', latest_block)
64
  return latest_block
65
+
66
+
requirements.txt CHANGED
@@ -18,4 +18,7 @@ langchain_openai
18
  nltk
19
  scikit-learn
20
  pandas
 
 
 
21
 
 
18
  nltk
19
  scikit-learn
20
  pandas
21
+ langchain-community
22
+ tiktoken
23
+ langchain-anthropic
24
 
utils/__init__.py ADDED
File without changes
utils/__pycache__/data_loader.cpython-312.pyc CHANGED
Binary files a/utils/__pycache__/data_loader.cpython-312.pyc and b/utils/__pycache__/data_loader.cpython-312.pyc differ
 
utils/__pycache__/models_loader.cpython-312.pyc CHANGED
Binary files a/utils/__pycache__/models_loader.cpython-312.pyc and b/utils/__pycache__/models_loader.cpython-312.pyc differ
 
utils/data_loader.py CHANGED
@@ -1,9 +1,11 @@
1
 
 
2
  from datasets import load_dataset
3
- dataset = load_dataset("subashdvorak/tiktok-formatted-story-v2", revision="embedded")
4
- data = dataset['train'].add_faiss_index('embeddings')
5
 
6
  def load_influencer_data():
7
- return data
 
8
 
9
 
 
1
 
2
+ print('Loading Dataset..................')
3
  from datasets import load_dataset
4
+ # dataset = load_dataset("subashdvorak/tiktok-formatted-story-v2", revision="embedded")
5
+ # data = dataset['train'].add_faiss_index('embeddings')
6
 
7
  def load_influencer_data():
8
+ return 'Some error occouring'
9
+ print('Dataset loaded.................')
10
 
11
 
utils/load_embeddings.py CHANGED
@@ -2,10 +2,13 @@ import numpy as np
2
  import ast
3
  import faiss
4
  import pandas as pd
 
5
 
6
 
7
  def load_index_once():
8
- df = pd.read_csv('extracted_data.csv')
 
 
9
  df['embeddings'] = df['embeddings'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
10
  embeddings = np.vstack(df['embeddings'].values).astype('float32')
11
  faiss.normalize_L2(embeddings)
 
2
  import ast
3
  import faiss
4
  import pandas as pd
5
+ from datasets import load_dataset
6
 
7
 
8
  def load_index_once():
9
+ dataset = load_dataset("DvorakInnovationAI/rt-genai-dataset-v1", revision="openai-embeddings")
10
+ df = dataset["train"]
11
+ df= df.to_pandas()
12
  df['embeddings'] = df['embeddings'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
13
  embeddings = np.vstack(df['embeddings'].values).astype('float32')
14
  faiss.normalize_L2(embeddings)
utils/models_loader.py CHANGED
@@ -1,28 +1,25 @@
 
 
 
1
  from langchain_groq import ChatGroq
2
  from langchain_openai import ChatOpenAI
3
  from langchain_google_genai import ChatGoogleGenerativeAI
4
-
5
- from sentence_transformers import SentenceTransformer
6
- # from huggingface_hub import InferenceClient
7
  from huggingface_hub import login
8
  from dotenv import load_dotenv
9
  load_dotenv()
10
- import os
11
- import requests
12
- import numpy as np
13
- # from langchain_huggingface import HuggingFaceEndpoint
14
  os.environ['HUGGINGFACEHUB_ACCESS_TOKEN']=os.getenv('HUGGINGFACEHUB_ACCESS_TOKEN')
15
  login(os.environ['HUGGINGFACEHUB_ACCESS_TOKEN'])
16
  os.environ['GROQ_API_KEY']=os.getenv('GROQ_API_KEY')
17
 
18
-
19
-
20
  llm_gemini = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
21
 
22
  llm = ChatGroq(
23
  model="llama-3.1-8b-instant",
24
  temperature=0.7,
25
-
26
  )
27
 
28
  llm_gpt = ChatOpenAI(
@@ -56,20 +53,10 @@ ST = HFEmbeddingAPI(
56
  token=os.environ.get('HUGGINGFACEHUB_ACCESS_TOKEN')
57
  )
58
 
59
- # ST = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
60
-
61
-
62
-
63
- improver_llm = ChatOpenAI(
64
- model="gpt-4o-mini",
65
- temperature=0.7,
66
- max_tokens=500,
67
- )
68
-
69
-
70
- ideator_llm = llm
71
- critic_llm = llm
72
- validator_llm = llm
73
 
74
 
75
 
 
1
+ import os
2
+ import requests
3
+ import numpy as np
4
  from langchain_groq import ChatGroq
5
  from langchain_openai import ChatOpenAI
6
  from langchain_google_genai import ChatGoogleGenerativeAI
7
+ from langchain_anthropic import ChatAnthropic
8
+ from langchain_openai import OpenAIEmbeddings
 
9
  from huggingface_hub import login
10
  from dotenv import load_dotenv
11
  load_dotenv()
 
 
 
 
12
  os.environ['HUGGINGFACEHUB_ACCESS_TOKEN']=os.getenv('HUGGINGFACEHUB_ACCESS_TOKEN')
13
  login(os.environ['HUGGINGFACEHUB_ACCESS_TOKEN'])
14
  os.environ['GROQ_API_KEY']=os.getenv('GROQ_API_KEY')
15
 
16
+ embedding_model = OpenAIEmbeddings(model="text-embedding-3-small", dimensions=1536)
17
+ llm_anthropic = ChatAnthropic(model='claude-3-5-sonnet-20241022', temperature=0.7, max_tokens=500)
18
  llm_gemini = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
19
 
20
  llm = ChatGroq(
21
  model="llama-3.1-8b-instant",
22
  temperature=0.7,
 
23
  )
24
 
25
  llm_gpt = ChatOpenAI(
 
53
  token=os.environ.get('HUGGINGFACEHUB_ACCESS_TOKEN')
54
  )
55
 
56
+ improver_llm = llm_anthropic
57
+ ideator_llm = llm_anthropic
58
+ critic_llm = llm_anthropic
59
+ validator_llm = llm_anthropic
 
 
 
 
 
 
 
 
 
 
60
 
61
 
62
 
utils/utils.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ def clean_text(text: str) -> str:
3
+ """
4
+ General-purpose text cleaner for LLMs or downstream NLP tasks.
5
+ Removes special characters, escape sequences, excess spaces, and normalizes punctuation.
6
+ """
7
+
8
+ if not isinstance(text, str):
9
+ return ""
10
+
11
+ # Normalize encoded newlines and tabs
12
+ text = text.replace("\\n", "\n").replace("\\t", " ")
13
+
14
+ # Remove stray backslashes (\\), unless part of newline
15
+ text = re.sub(r"\\(?!n)", '', text)
16
+
17
+ # Remove brackets often used for metadata or markup
18
+ text = re.sub(r'[\[\]{}<>]', '', text)
19
+
20
+ # Remove quotes
21
+ text = re.sub(r"[\"']", '', text)
22
+
23
+ # Remove special characters except basic punctuation (.,!?)
24
+ text = re.sub(r"[^a-zA-Z0-9.,!? \n]", '', text)
25
+
26
+ # Remove repeated punctuation like "!!!" or "???"
27
+ text = re.sub(r'([!?.,]){2,}', r'\1', text)
28
+
29
+ # Normalize multiple spaces and newlines
30
+ text = re.sub(r'[ \t]+', ' ', text)
31
+ text = re.sub(r'\n{3,}', '\n\n', text) # Collapse more than 2 newlines to just 2
32
+ text = re.sub(r' *\n *', '\n', text) # Clean spaces around newlines
33
+
34
+ return text.strip()