File size: 5,592 Bytes
946d35b
 
93a5bf9
946d35b
 
 
 
708437f
efe9a51
72a7f4f
96b0973
563ce7c
 
 
 
 
 
93a5bf9
96b0973
 
708437f
efe9a51
 
 
946d35b
96b0973
 
 
 
 
 
 
 
946d35b
 
 
efe9a51
946d35b
 
 
 
 
 
 
 
 
 
708437f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32131c3
708437f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
563ce7c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179

from langchain_core.messages import SystemMessage
from .tools import retrieve_tool
import base64
from PIL import Image
from io import BytesIO
from fastapi import UploadFile
from huggingface_hub import InferenceClient
from .prompts import story_to_prompt , final_story_prompt
import os
from langgraph.prebuilt import create_react_agent
import pandas as pd
from datasets import load_dataset
import ast
import faiss
import re
import numpy as np
from utils.models_loader import ST , llm



def generate_final_story(final_state):
    if len(final_state['preferred_topics'])>0:
        template = final_story_prompt(final_state)
        messages = [SystemMessage(content=template)]

        tools = [retrieve_tool]
        react_agent=create_react_agent(
        model=llm.bind_tools(tools),
        tools=tools)

        response = react_agent.invoke({'messages':messages})
        response = response['messages'][-1].content
        return response
    
    else:
        return final_state['stories'][-1]



def encode_image_to_base64(uploaded_file: UploadFile) -> str:
    return base64.b64encode(uploaded_file.file.read()).decode("utf-8")
  

# Convert base64 string to PIL image (optional for LangGraph processing)
def process_image(base64_str: str) -> Image.Image:
    image_data = base64.b64decode(base64_str)
    return Image.open(BytesIO(image_data))


def generate_prompt(final_story):
    print('************Entering prompt generator****************')
    messages = [
    (
        "system",
        story_to_prompt,
    ),
    ("human", final_story),
]
    prompt = llm.invoke(messages)
    print('The prompt is:',prompt)
    return prompt.content

def generate_image(final_story):
    prompt = generate_prompt(final_story)
    print('************Finished prompt generator****************')

    client = InferenceClient(
        provider="hf-inference",
        api_key=os.environ.get('HUGGINGFACEHUB_ACCESS_TOKEN'),
    )

    print('************Finished calling generator****************')


        # output is a PIL.Image object
    image = client.text_to_image(
        prompt,
        model="black-forest-labs/FLUX.1-schnell",
    )
    print('*****************Image Created*******************')

    image.save('image.png')
    print('*****************Image Saved*******************')
    return "Image Created"


def save_to_db(business_details):
    dataset = load_dataset("subashdvorak/tiktok-agentic-story")['train']
    # dataset = load_influencer_data()
    df = pd.DataFrame(dataset)

    # 2. Flatten all business detail values to a set of lowercase strings
    all_values = set()
    for v in business_details.values():
        if isinstance(v, str):
            all_values.add(v.lower())
        elif isinstance(v, list):
            all_values.update(map(str.lower, map(str, v)))

    # 3. Match rows where ANY column contains ANY of the values
    def row_matches(row):
        return any(
            str(cell).lower().find(val) != -1
            for cell in row
            for val in all_values
        )

    # 4. Apply row-wise matching
    matched_df = df[df.apply(row_matches, axis=1)]
    matched_df.to_csv('extracted_data.csv')

def manual_retrieval(messages, business_details):
    # === Load CSV ===
    csv_path = 'extracted_data.csv'
    df = pd.read_csv(csv_path)

    # === Parse stored embeddings ===
    df['embeddings'] = df['embeddings'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
    embeddings = np.vstack(df['embeddings'].values).astype('float32')

    # === Build FAISS index ===
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)

    # === Load SentenceTransformer model ===

    # === Encode the query and search ===
    query_embedding = ST.encode(str(messages)+str(business_details)).reshape(1, -1).astype('float32')
    top_k=3
    distances, indices = index.search(query_embedding, top_k)

    # === Function to extract sections 1 and 6 ===
    def extract_story_and_branding(full_story):
        full_story = full_story.replace('**6. Visible Texts or Brandings**', '**6. Visible Texts or Brandings:**')
        full_story = full_story.replace('**1. Story**', '**1. Story:**')

        pattern = (
            r"\*\*1\. Story:\*\*(.*?)(?=\*\*\d+\.\s)"
            r".*?"
            r"\*\*6\. Visible Texts or Brandings:\*\*(.*?)(?=\*\*\d+\.\s|$)"
        )
        match = re.search(pattern, full_story, re.DOTALL)
        if match:
            story_section = match.group(1).strip()
            branding_section = match.group(2).strip()
            return f"Story:\n{story_section}\n\nVisible Texts or Brandings:\n{branding_section}"
        else:
            return "Requested sections not found."

    # === Format results ===
    outer_list = []
    for i, idx in enumerate(indices[0]):
        res = {
            'rank': i + 1,
            'username': df.iloc[idx]['username'],
            'agentic_story': df.iloc[idx]['agentic_story'],
            'likesCount': df.iloc[idx]['likesCount'],
            'commentCount': df.iloc[idx]['commentCount'],
            'distance': distances[0][i]
        }

        inner_list = []
        inner_list.append(f"[{res['rank']}]. The influencer name is: **{res['username']}** — Likes: **{res['likesCount']}**, Comments: **{res['commentCount']}**")
        inner_list.append(f"The story of that particular video is:\n{extract_story_and_branding(res['agentic_story'])}")
        inner_list.append(f"Distance: {res['distance']:.4f}")
        outer_list.append(inner_list)

    return str(outer_list)