Spaces:
Sleeping
Sleeping
Commit ·
acfddab
1
Parent(s): 247193e
integrated image description'
Browse files- api/routers/analytics_chatbot.py +34 -21
- api/schemas/analytics_chatbot.py +4 -2
- logs/access.log +0 -0
- src/genai/analytics_chatbot/handlers/__init__.py +15 -0
- src/genai/analytics_chatbot/handlers/analytics_description.py +31 -0
- src/genai/analytics_chatbot/handlers/comment_quality.py +1 -0
- src/genai/analytics_chatbot/utils/name_variations.json +7 -1
- src/genai/analytics_chatbot/utils/nodes.py +24 -26
- src/genai/analytics_chatbot/utils/prompts.py +10 -1
api/routers/analytics_chatbot.py
CHANGED
|
@@ -2,15 +2,19 @@ import ast
|
|
| 2 |
import os
|
| 3 |
import json
|
| 4 |
import hashlib
|
|
|
|
| 5 |
import logging
|
| 6 |
from fastapi import APIRouter
|
|
|
|
| 7 |
from redis import Redis
|
| 8 |
from fastapi.responses import StreamingResponse
|
| 9 |
from api.stored_data import stored_data
|
| 10 |
from src.genai.analytics_chatbot.agent import ChatbotAgent
|
| 11 |
from src.genai.analytics_chatbot.utils.utils import process_query
|
| 12 |
-
from
|
|
|
|
| 13 |
from config.redis_config import redis_client
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
app_logger = logging.getLogger("app_logger")
|
|
@@ -21,29 +25,38 @@ router = APIRouter()
|
|
| 21 |
agent=ChatbotAgent()
|
| 22 |
graph = agent.chatbot_graph()
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
|
|
|
| 28 |
|
|
|
|
| 29 |
cached_response = redis_client.get(cache_key)
|
| 30 |
-
|
| 31 |
-
print("Cache hit")
|
| 32 |
-
return json.loads(cached_response)
|
| 33 |
-
|
| 34 |
-
config={"configurable": {"thread_id": "analytics-chatbot-thread"},"run_name":"analytics-chatbot"}
|
| 35 |
-
result=graph.invoke({'messages':msg},config=config)
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
if result.get('backup_data') is not None:
|
| 40 |
-
response_to_cache = {'backup_response': result['backup_data']}
|
| 41 |
else:
|
| 42 |
-
response_to_cache = {
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
}
|
|
|
|
|
|
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
return response_to_cache
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import os
|
| 3 |
import json
|
| 4 |
import hashlib
|
| 5 |
+
import base64
|
| 6 |
import logging
|
| 7 |
from fastapi import APIRouter
|
| 8 |
+
from typing import Optional
|
| 9 |
from redis import Redis
|
| 10 |
from fastapi.responses import StreamingResponse
|
| 11 |
from api.stored_data import stored_data
|
| 12 |
from src.genai.analytics_chatbot.agent import ChatbotAgent
|
| 13 |
from src.genai.analytics_chatbot.utils.utils import process_query
|
| 14 |
+
from src.genai.analytics_chatbot.handlers.analytics_description import generate_analytics_description
|
| 15 |
+
from api.schemas.analytics_chatbot import AnalyticsChatRequest
|
| 16 |
from config.redis_config import redis_client
|
| 17 |
+
from openai import OpenAI
|
| 18 |
|
| 19 |
|
| 20 |
app_logger = logging.getLogger("app_logger")
|
|
|
|
| 25 |
agent=ChatbotAgent()
|
| 26 |
graph = agent.chatbot_graph()
|
| 27 |
|
| 28 |
+
|
| 29 |
+
@router.post("/analytics-chatbot")
|
| 30 |
+
def get_analytics(request: AnalyticsChatRequest):
|
| 31 |
+
user_query = process_query(request.msg)
|
| 32 |
+
print('Processed user query:', user_query)
|
| 33 |
|
| 34 |
+
cache_key = f"analytics:{hashlib.md5(user_query.encode()).hexdigest()}"
|
| 35 |
cached_response = redis_client.get(cache_key)
|
| 36 |
+
print('cached-response:', cached_response)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
+
if cached_response:
|
| 39 |
+
response_to_cache = json.loads(cached_response)
|
|
|
|
|
|
|
| 40 |
else:
|
| 41 |
+
response_to_cache = {}
|
| 42 |
+
|
| 43 |
+
if not response_to_cache.get('response') or not response_to_cache.get('endpoint'):
|
| 44 |
+
config = {"configurable": {"thread_id": "analytics-chatbot-thread"},
|
| 45 |
+
"run_name": "analytics-chatbot"}
|
| 46 |
+
result = graph.invoke({'messages': user_query}, config=config)
|
| 47 |
|
| 48 |
+
if result.get('backup_data') is not None:
|
| 49 |
+
response_to_cache['backup_response'] = result['backup_data']
|
| 50 |
+
else:
|
| 51 |
+
response_to_cache['response'] = result['response']
|
| 52 |
+
response_to_cache['endpoint'] = result['endpoint']
|
| 53 |
+
|
| 54 |
+
if request.image_base64 and not response_to_cache.get('description'):
|
| 55 |
+
description = generate_analytics_description(user_query, request.image_base64)
|
| 56 |
+
response_to_cache['description'] = description
|
| 57 |
+
|
| 58 |
+
redis_client.set(cache_key, json.dumps(response_to_cache), ex=3000)
|
| 59 |
return response_to_cache
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
|
api/schemas/analytics_chatbot.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
| 1 |
from pydantic import BaseModel
|
|
|
|
| 2 |
|
| 3 |
-
class
|
| 4 |
-
|
|
|
|
|
|
| 1 |
from pydantic import BaseModel
|
| 2 |
+
from typing import Optional
|
| 3 |
|
| 4 |
+
class AnalyticsChatRequest(BaseModel):
|
| 5 |
+
msg: str
|
| 6 |
+
image_base64:Optional[str] = None
|
logs/access.log
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/genai/analytics_chatbot/handlers/__init__.py
CHANGED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .compare import compare
|
| 2 |
+
from .posting_time import get_posting_time
|
| 3 |
+
from .peak_comment_hour import get_peak_comment_hour
|
| 4 |
+
from .emoji_count import get_emoji_count
|
| 5 |
+
from .comment_quality import get_comment_quality
|
| 6 |
+
from .bot_and_diversity import get_bot_and_diversity
|
| 7 |
+
|
| 8 |
+
__all__ = [
|
| 9 |
+
"compare",
|
| 10 |
+
"get_posting_time",
|
| 11 |
+
"get_peak_comment_hour",
|
| 12 |
+
"get_emoji_count",
|
| 13 |
+
"get_comment_quality",
|
| 14 |
+
"get_bot_and_diversity",
|
| 15 |
+
]
|
src/genai/analytics_chatbot/handlers/analytics_description.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from openai import OpenAI
|
| 3 |
+
import os
|
| 4 |
+
from ..utils.prompts import analytics_description_prompt
|
| 5 |
+
client = OpenAI(
|
| 6 |
+
base_url="https://router.huggingface.co/v1",
|
| 7 |
+
api_key=os.environ['HUGGINGFACEHUB_ACCESS_TOKEN'],)
|
| 8 |
+
|
| 9 |
+
def generate_analytics_description(query,image_base64):
|
| 10 |
+
completion = client.chat.completions.create(
|
| 11 |
+
model="zai-org/GLM-4.5V:novita",
|
| 12 |
+
messages=[
|
| 13 |
+
{
|
| 14 |
+
"role": "user",
|
| 15 |
+
"content": [
|
| 16 |
+
{
|
| 17 |
+
"type": "text",
|
| 18 |
+
"text": analytics_description_prompt(query)
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"type": "image_url",
|
| 22 |
+
"image_url": {
|
| 23 |
+
"url": f"data:image/jpeg;base64,{image_base64}"
|
| 24 |
+
}
|
| 25 |
+
}
|
| 26 |
+
]
|
| 27 |
+
}
|
| 28 |
+
],
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
return completion.choices[0].message.content
|
src/genai/analytics_chatbot/handlers/comment_quality.py
CHANGED
|
@@ -13,5 +13,6 @@ def get_comment_quality(state,llm_gpt,url):
|
|
| 13 |
parameters=llm_gpt.with_structured_output(CommentQualityFormatter , method='function_calling').invoke(messages)
|
| 14 |
print(parameters)
|
| 15 |
response = requests.get(url, params={ 'start_date': parameters.start_date , 'end_date':parameters.end_date,'influencer_username':process_query(parameters.influencer_name)})
|
|
|
|
| 16 |
return response
|
| 17 |
|
|
|
|
| 13 |
parameters=llm_gpt.with_structured_output(CommentQualityFormatter , method='function_calling').invoke(messages)
|
| 14 |
print(parameters)
|
| 15 |
response = requests.get(url, params={ 'start_date': parameters.start_date , 'end_date':parameters.end_date,'influencer_username':process_query(parameters.influencer_name)})
|
| 16 |
+
print(response)
|
| 17 |
return response
|
| 18 |
|
src/genai/analytics_chatbot/utils/name_variations.json
CHANGED
|
@@ -1155,5 +1155,11 @@
|
|
| 1155 |
"tashyil ha g",
|
| 1156 |
"tashyil",
|
| 1157 |
"tashyil g"
|
| 1158 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1159 |
}
|
|
|
|
| 1155 |
"tashyil ha g",
|
| 1156 |
"tashyil",
|
| 1157 |
"tashyil g"
|
| 1158 |
+
],
|
| 1159 |
+
"good sentiment":[
|
| 1160 |
+
"good"
|
| 1161 |
+
],
|
| 1162 |
+
"bad sentiment": [
|
| 1163 |
+
"bad"
|
| 1164 |
+
]
|
| 1165 |
}
|
src/genai/analytics_chatbot/utils/nodes.py
CHANGED
|
@@ -9,12 +9,14 @@ from src.genai.utils.models_loader import llm_gpt
|
|
| 9 |
import numpy as np
|
| 10 |
from src.genai.utils.data_loader import api_knowledge_df, api_index, caption_df , caption_index
|
| 11 |
from src.genai.utils.models_loader import embedding_model
|
| 12 |
-
from ..handlers
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
class FetchLastMessage:
|
|
@@ -39,12 +41,11 @@ class RetrievePossibleEndpoints:
|
|
| 39 |
self.df = api_knowledge_df
|
| 40 |
self.index = api_index
|
| 41 |
self.results = []
|
| 42 |
-
# self.results = ['/api/v1/compare/', '/api/v1/engagement/basic-metrics', '/api/v1/content/hashtags-analysis', '/api/v1/audience/emoji-count', '/api/v1/engagement/temporal_analysis']
|
| 43 |
|
| 44 |
def run(self,state:State):
|
| 45 |
print('Gone to retrieve possible endpoints')
|
| 46 |
query_embedding = np.array(embedding_model.embed_query(state['latest_message'])).reshape(1, -1).astype('float32')
|
| 47 |
-
distances, indices = self.index.search(query_embedding,
|
| 48 |
for idx in indices[0]:
|
| 49 |
row = self.df.iloc[idx]
|
| 50 |
print('Endpoint:',row['endpoint'])
|
|
@@ -135,6 +136,15 @@ class FetchDataNode:
|
|
| 135 |
"Authorization": "Bearer YOUR_API_KEY", # replace with your API key if needed
|
| 136 |
"Content-Type": "application/json"
|
| 137 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
def run(self, state:State):
|
| 140 |
try:
|
|
@@ -142,26 +152,14 @@ class FetchDataNode:
|
|
| 142 |
print('Entered to fetch data')
|
| 143 |
url = f'''{self.base_url}{state['endpoint']}'''
|
| 144 |
|
| 145 |
-
if state['endpoint']
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
response = get_posting_time(state, llm_gpt,url)
|
| 151 |
-
return {'response': response.json()}
|
| 152 |
-
|
| 153 |
-
elif state['endpoint']=='/api/v1/audience/peak-comment-hour':
|
| 154 |
-
response = get_peak_comment_hour(state,llm_gpt,url)
|
| 155 |
-
return {'response':response.json()}
|
| 156 |
-
|
| 157 |
-
elif state['endpoint']== '/api/v1/audience/emoji-count':
|
| 158 |
-
response = get_emoji_count(state,llm_gpt,url)
|
| 159 |
-
return {'response':response.json()}
|
| 160 |
-
|
| 161 |
-
elif state['endpoint']== '/api/v1/audience/comment-quality':
|
| 162 |
-
response = get_comment_quality(state,llm_gpt,url)
|
| 163 |
return {'response':response.json()}
|
| 164 |
|
|
|
|
| 165 |
elif 'single_influencer_query' in state['query_type']:
|
| 166 |
response = requests.get(url, params=state['parameters_values'],headers=self.headers)
|
| 167 |
print('Data from api:', response)
|
|
|
|
| 9 |
import numpy as np
|
| 10 |
from src.genai.utils.data_loader import api_knowledge_df, api_index, caption_df , caption_index
|
| 11 |
from src.genai.utils.models_loader import embedding_model
|
| 12 |
+
from ..handlers import (
|
| 13 |
+
compare,
|
| 14 |
+
get_posting_time,
|
| 15 |
+
get_peak_comment_hour,
|
| 16 |
+
get_emoji_count,
|
| 17 |
+
get_comment_quality,
|
| 18 |
+
get_bot_and_diversity,
|
| 19 |
+
)
|
| 20 |
|
| 21 |
|
| 22 |
class FetchLastMessage:
|
|
|
|
| 41 |
self.df = api_knowledge_df
|
| 42 |
self.index = api_index
|
| 43 |
self.results = []
|
|
|
|
| 44 |
|
| 45 |
def run(self,state:State):
|
| 46 |
print('Gone to retrieve possible endpoints')
|
| 47 |
query_embedding = np.array(embedding_model.embed_query(state['latest_message'])).reshape(1, -1).astype('float32')
|
| 48 |
+
distances, indices = self.index.search(query_embedding,10)
|
| 49 |
for idx in indices[0]:
|
| 50 |
row = self.df.iloc[idx]
|
| 51 |
print('Endpoint:',row['endpoint'])
|
|
|
|
| 136 |
"Authorization": "Bearer YOUR_API_KEY", # replace with your API key if needed
|
| 137 |
"Content-Type": "application/json"
|
| 138 |
}
|
| 139 |
+
|
| 140 |
+
self.endpoint_handlers = {
|
| 141 |
+
'/api/v1/compare/': compare,
|
| 142 |
+
'/api/v1/engagement/posting-time-analysis': get_posting_time,
|
| 143 |
+
'/api/v1/audience/peak-comment-hour': get_peak_comment_hour,
|
| 144 |
+
'/api/v1/audience/emoji-count': get_emoji_count,
|
| 145 |
+
'/api/v1/audience/comment-quality': get_comment_quality,
|
| 146 |
+
'/api/v1/audience/bot-and-diversity': get_bot_and_diversity
|
| 147 |
+
}
|
| 148 |
|
| 149 |
def run(self, state:State):
|
| 150 |
try:
|
|
|
|
| 152 |
print('Entered to fetch data')
|
| 153 |
url = f'''{self.base_url}{state['endpoint']}'''
|
| 154 |
|
| 155 |
+
if state['endpoint'] in self.endpoint_handlers:
|
| 156 |
+
print('Entered to handler.')
|
| 157 |
+
handler = self.endpoint_handlers[state['endpoint']]
|
| 158 |
+
response = handler(state, llm_gpt, url)
|
| 159 |
+
print('Returned by handler.')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
return {'response':response.json()}
|
| 161 |
|
| 162 |
+
|
| 163 |
elif 'single_influencer_query' in state['query_type']:
|
| 164 |
response = requests.get(url, params=state['parameters_values'],headers=self.headers)
|
| 165 |
print('Data from api:', response)
|
src/genai/analytics_chatbot/utils/prompts.py
CHANGED
|
@@ -25,7 +25,9 @@ output:
|
|
| 25 |
'''
|
| 26 |
|
| 27 |
fetch_last_message_prompt = '''
|
| 28 |
-
You are an AI assistant that reads an entire conversation between a human and an AI.
|
|
|
|
|
|
|
| 29 |
|
| 30 |
- Carefully consider all previous human messages to understand context.
|
| 31 |
- Focus on the latest goal, request, or intention, even if it is expressed briefly or implicitly.
|
|
@@ -108,4 +110,11 @@ You are perfect parameters extractor for analysis of bot and comment diversity o
|
|
| 108 |
Given a user query and a list of needed parameters, return a Python dictionary assigning the best value for each parameter.
|
| 109 |
You have to return a dictionary containing influencer_name , number of commentors (top_n), start_date and end_date from the user query.
|
| 110 |
If there is no any specific mention of dates, you can return None for dates. In the case of number of commentors, return a default value of 10 if the number is not passed from the user.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
'''
|
|
|
|
| 25 |
'''
|
| 26 |
|
| 27 |
fetch_last_message_prompt = '''
|
| 28 |
+
You are an AI assistant that reads an entire conversation between a human and an AI.
|
| 29 |
+
The human is trying to ask something about the influencers.
|
| 30 |
+
Your task is to detect the human's most recent intention, taking into account the full conversation history.
|
| 31 |
|
| 32 |
- Carefully consider all previous human messages to understand context.
|
| 33 |
- Focus on the latest goal, request, or intention, even if it is expressed briefly or implicitly.
|
|
|
|
| 110 |
Given a user query and a list of needed parameters, return a Python dictionary assigning the best value for each parameter.
|
| 111 |
You have to return a dictionary containing influencer_name , number of commentors (top_n), start_date and end_date from the user query.
|
| 112 |
If there is no any specific mention of dates, you can return None for dates. In the case of number of commentors, return a default value of 10 if the number is not passed from the user.
|
| 113 |
+
'''
|
| 114 |
+
|
| 115 |
+
def analytics_description_prompt(query):
|
| 116 |
+
return f'''
|
| 117 |
+
You are provided with the user query and the image.
|
| 118 |
+
Give clear information to the user for their query only by analyzing the image. Don't give any responses outside of the image's context.
|
| 119 |
+
The user query is: \n{query}\n
|
| 120 |
'''
|