Spaces:

subashdvorak
/

trygithubactions

Sleeping

App Files Files Community

subashpoudel commited on May 19, 2025

Commit

563ce7c

1 Parent(s): 3e87e76

Added manual retrieval in utilities

Browse files

Files changed (1) hide show

my_agent/utils/utils.py +99 -6

my_agent/utils/utils.py CHANGED Viewed

@@ -10,7 +10,13 @@ from huggingface_hub import InferenceClient
 from .prompts import story_to_prompt , final_story_prompt
 import os
 from langgraph.prebuilt import create_react_agent
@@ -78,9 +84,96 @@ def generate_image(final_story):
     image.save('image.png')
     print('*****************Image Saved*******************')
     return "Image Created"
-    # try:
-    #     return image
-    # except:
-    #     return 'Image created'

 from .prompts import story_to_prompt , final_story_prompt
 import os
 from langgraph.prebuilt import create_react_agent
+import pandas as pd
+from datasets import load_dataset
+import ast
+import faiss
+import re
+import numpy as np
+from .models_loader import ST
     image.save('image.png')
     print('*****************Image Saved*******************')
     return "Image Created"
+def save_to_db(business_details):
+    dataset = load_dataset("subashdvorak/tiktok-agentic-story")['train']
+    # dataset = load_influencer_data()
+    df = pd.DataFrame(dataset)
+    # 2. Flatten all business detail values to a set of lowercase strings
+    all_values = set()
+    for v in business_details.values():
+        if isinstance(v, str):
+            all_values.add(v.lower())
+        elif isinstance(v, list):
+            all_values.update(map(str.lower, map(str, v)))
+    # 3. Match rows where ANY column contains ANY of the values
+    def row_matches(row):
+        return any(
+            str(cell).lower().find(val) != -1
+            for cell in row
+            for val in all_values
+        )
+    # 4. Apply row-wise matching
+    matched_df = df[df.apply(row_matches, axis=1)]
+    matched_df.to_csv('extracted_data.csv')
+def manual_retrieval(messages, business_details):
+    # === Load CSV ===
+    csv_path = 'extracted_data.csv'
+    df = pd.read_csv(csv_path)
+    # === Parse stored embeddings ===
+    df['embeddings'] = df['embeddings'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
+    embeddings = np.vstack(df['embeddings'].values).astype('float32')
+    # === Build FAISS index ===
+    dimension = embeddings.shape[1]
+    index = faiss.IndexFlatL2(dimension)
+    index.add(embeddings)
+    # === Load SentenceTransformer model ===
+    # === Encode the query and search ===
+    query_embedding = ST.encode(str(messages)+str(business_details)).reshape(1, -1).astype('float32')
+    top_k=3
+    distances, indices = index.search(query_embedding, top_k)
+    # === Function to extract sections 1 and 6 ===
+    def extract_story_and_branding(full_story):
+        full_story = full_story.replace('**6. Visible Texts or Brandings**', '**6. Visible Texts or Brandings:**')
+        full_story = full_story.replace('**1. Story**', '**1. Story:**')
+        pattern = (
+            r"\*\*1\. Story:\*\*(.*?)(?=\*\*\d+\.\s)"
+            r".*?"
+            r"\*\*6\. Visible Texts or Brandings:\*\*(.*?)(?=\*\*\d+\.\s|$)"
+        )
+        match = re.search(pattern, full_story, re.DOTALL)
+        if match:
+            story_section = match.group(1).strip()
+            branding_section = match.group(2).strip()
+            return f"Story:\n{story_section}\n\nVisible Texts or Brandings:\n{branding_section}"
+        else:
+            return "Requested sections not found."
+    # === Format results ===
+    outer_list = []
+    for i, idx in enumerate(indices[0]):
+        res = {
+            'rank': i + 1,
+            'username': df.iloc[idx]['username'],
+            'agentic_story': df.iloc[idx]['agentic_story'],
+            'likesCount': df.iloc[idx]['likesCount'],
+            'commentCount': df.iloc[idx]['commentCount'],
+            'distance': distances[0][i]
+        }
+        inner_list = []
+        inner_list.append(f"[{res['rank']}]. The influencer name is: **{res['username']}** — Likes: **{res['likesCount']}**, Comments: **{res['commentCount']}**")
+        inner_list.append(f"The story of that particular video is:\n{extract_story_and_branding(res['agentic_story'])}")
+        inner_list.append(f"Distance: {res['distance']:.4f}")
+        outer_list.append(inner_list)
+    return str(outer_list)