File size: 2,182 Bytes
3a3fe92
 
 
 
 
 
 
6c655a3
583f6dd
 
 
3a3fe92
a6ebaaf
9acd478
3a3fe92
 
9acd478
3a3fe92
6c655a3
 
3a3fe92
6c655a3
3a3fe92
 
6c655a3
 
 
 
 
 
3a3fe92
 
6c655a3
 
3a3fe92
6c655a3
 
 
 
 
 
3a3fe92
 
6c655a3
 
 
 
 
3a3fe92
 
6c655a3
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55

from langchain_core.tools import tool
from .state import QueryFormatter
import pandas as pd
import numpy as np
import ast
import faiss
import tiktoken
from src.genai.utils.models_loader import embedding_model
from src.genai.utils.load_embeddings import embeddings , index , df
from src.genai.utils.utils import clean_text

@tool("influencers_data_retrieval_tool", args_schema=QueryFormatter, return_direct=False,description="Retrieve influencer-related data for a given query.")
def retrieve_tool(business_details):
    '''
    Always invoke this tool.
    Retrieve influencer's data by semantic search of **business details**.
    '''
    query_embedding = np.array(embedding_model.embed_query(str(business_details))).reshape(1, -1).astype('float32')
    faiss.normalize_L2(query_embedding)

    top_k = len(df)
    distances, indices = index.search(query_embedding, top_k)

    similarity_threshold = 0.35
    selected = [(idx, sim) for idx, sim in zip(indices[0], distances[0]) if sim >= similarity_threshold]

    if not selected:
        return "No influencers found."

    # === Format results ===
    outer_list = []
    for rank, (idx, sim) in enumerate(selected, 1):
        row = df.iloc[idx]
        res = {
            'rank': rank,
            'username': row['username'],
            'visible_text_or_brandings': row['visible_texts_or_brandings'],
            'likesCount': row['likesCount'],
            'commentCount': row['commentCount'],
            'product_or_service_details': row['product_or_service_details'],
        }

        inner_list = [
            f"[{res['rank']}]. The influencer name is: **{res['username']}** — Likes: **{res['likesCount']}**, Comments: **{res['commentCount']}**",
            f"The branding or promotion done is:\n{res['visible_text_or_brandings']}",
            f"The details of product or service is:\n{res['product_or_service_details']}"
        ]
        outer_list.append(inner_list)

    cleaned_response = clean_text(str(outer_list))
    encoding = tiktoken.encoding_for_model('gpt-4o-mini')
    tokens = encoding.encode(cleaned_response)
    trimmed_response = tokens[:1000]
    return encoding.decode(trimmed_response)