Spaces:
Sleeping
Sleeping
Commit ·
4564881
1
Parent(s): f4d0448
Make Chatbot Agentic
Browse files- src/api/main.py +28 -18
- src/data_ingestion/saving_data_to_postgres.py +0 -0
- src/llm/groq_client.py +33 -0
- src/llm/prompts.py +48 -34
- src/llm/tool_use_schema.json +0 -35
- src/retrieval/rag_pipeline.py +44 -7
src/api/main.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
-
from
|
|
|
|
| 2 |
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
from pydantic import BaseModel, Field
|
| 4 |
import uvicorn
|
|
@@ -6,12 +7,30 @@ from src.api.schemas import RecommendationRequest, RecommendationResponse
|
|
| 6 |
import time
|
| 7 |
from config import settings
|
| 8 |
import traceback
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
-
from src.retrieval.rag_pipeline import AnimeRAGPipeline
|
| 11 |
|
| 12 |
app = FastAPI(title="Anime Recommendation API",
|
| 13 |
description="RAG-powered anime recommendation system",
|
| 14 |
-
version="1.0.0"
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
app.add_middleware(
|
|
@@ -22,16 +41,6 @@ app.add_middleware(
|
|
| 22 |
allow_headers=["*"]
|
| 23 |
)
|
| 24 |
|
| 25 |
-
pipeline = None
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
def get_pipeline():
|
| 29 |
-
"""Lazy initialization of pipeline"""
|
| 30 |
-
global pipeline
|
| 31 |
-
if pipeline is None:
|
| 32 |
-
pipeline = AnimeRAGPipeline(retriever_k=10)
|
| 33 |
-
return pipeline
|
| 34 |
-
|
| 35 |
|
| 36 |
@app.get("/")
|
| 37 |
async def root():
|
|
@@ -44,7 +53,7 @@ async def root():
|
|
| 44 |
|
| 45 |
|
| 46 |
@app.post("/recommend", response_model=RecommendationResponse)
|
| 47 |
-
async def get_recommendations(request: RecommendationRequest):
|
| 48 |
"""
|
| 49 |
Get anime recommendation based on user query
|
| 50 |
|
|
@@ -58,7 +67,7 @@ async def get_recommendations(request: RecommendationRequest):
|
|
| 58 |
```
|
| 59 |
"""
|
| 60 |
try:
|
| 61 |
-
rag_pipeline =
|
| 62 |
|
| 63 |
rag_pipeline.recommendation_n = request.n_results
|
| 64 |
|
|
@@ -76,7 +85,8 @@ async def get_recommendations(request: RecommendationRequest):
|
|
| 76 |
)
|
| 77 |
end_time = time.time()
|
| 78 |
|
| 79 |
-
print(f"Retrieved anime : \n{result["
|
|
|
|
| 80 |
print(f"Result Recommendations: \n{result["recommendations"][:20]}")
|
| 81 |
return RecommendationResponse(
|
| 82 |
query=result["query"],
|
|
@@ -96,9 +106,9 @@ async def get_recommendations(request: RecommendationRequest):
|
|
| 96 |
|
| 97 |
|
| 98 |
@app.get("/stats")
|
| 99 |
-
async def get_stats():
|
| 100 |
"""Get system statistics"""
|
| 101 |
-
rag_pipeline =
|
| 102 |
|
| 103 |
return {
|
| 104 |
"total_anime": rag_pipeline.retriever.collection.count(),
|
|
|
|
| 1 |
+
from src.retrieval.rag_pipeline import AnimeRAGPipeline
|
| 2 |
+
from fastapi import FastAPI, HTTPException, status, Request
|
| 3 |
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
from pydantic import BaseModel, Field
|
| 5 |
import uvicorn
|
|
|
|
| 7 |
import time
|
| 8 |
from config import settings
|
| 9 |
import traceback
|
| 10 |
+
import logging
|
| 11 |
+
from contextlib import asynccontextmanager
|
| 12 |
+
|
| 13 |
+
logger = logging.basicConfig(level=logging.INFO)
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
@asynccontextmanager
|
| 18 |
+
async def lifespan(app: FastAPI):
|
| 19 |
+
"""
|
| 20 |
+
Handles startup and shutdown of the Anime RAG Pipeline.
|
| 21 |
+
Replaces lazy global initialization with app state.
|
| 22 |
+
"""
|
| 23 |
+
print("Initializing Anime RAG Pipeline...")
|
| 24 |
+
app.state.pipeline = AnimeRAGPipeline(retriever_k=10)
|
| 25 |
+
|
| 26 |
+
yield
|
| 27 |
+
print("Shutting down... Cleaning up resources.")
|
| 28 |
|
|
|
|
| 29 |
|
| 30 |
app = FastAPI(title="Anime Recommendation API",
|
| 31 |
description="RAG-powered anime recommendation system",
|
| 32 |
+
version="1.0.0",
|
| 33 |
+
lifespan=lifespan)
|
| 34 |
|
| 35 |
|
| 36 |
app.add_middleware(
|
|
|
|
| 41 |
allow_headers=["*"]
|
| 42 |
)
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
@app.get("/")
|
| 46 |
async def root():
|
|
|
|
| 53 |
|
| 54 |
|
| 55 |
@app.post("/recommend", response_model=RecommendationResponse)
|
| 56 |
+
async def get_recommendations(request: RecommendationRequest, fastapi_req: Request):
|
| 57 |
"""
|
| 58 |
Get anime recommendation based on user query
|
| 59 |
|
|
|
|
| 67 |
```
|
| 68 |
"""
|
| 69 |
try:
|
| 70 |
+
rag_pipeline = fastapi_req.app.state.pipeline
|
| 71 |
|
| 72 |
rag_pipeline.recommendation_n = request.n_results
|
| 73 |
|
|
|
|
| 85 |
)
|
| 86 |
end_time = time.time()
|
| 87 |
|
| 88 |
+
# print(f"Retrieved anime : \n{result["retrieved_animes"][0]}")
|
| 89 |
+
print(f"Retrieved anime Count : \n{result["retrieved_count"]}")
|
| 90 |
print(f"Result Recommendations: \n{result["recommendations"][:20]}")
|
| 91 |
return RecommendationResponse(
|
| 92 |
query=result["query"],
|
|
|
|
| 106 |
|
| 107 |
|
| 108 |
@app.get("/stats")
|
| 109 |
+
async def get_stats(fastapi_req: Request):
|
| 110 |
"""Get system statistics"""
|
| 111 |
+
rag_pipeline = fastapi_req.app.state.pipeline
|
| 112 |
|
| 113 |
return {
|
| 114 |
"total_anime": rag_pipeline.retriever.collection.count(),
|
src/data_ingestion/saving_data_to_postgres.py
ADDED
|
File without changes
|
src/llm/groq_client.py
CHANGED
|
@@ -56,6 +56,39 @@ class GroqLLM:
|
|
| 56 |
logger.error(f"Groq API error: {e}")
|
| 57 |
return "Sorry, I encountered an error generating the response"
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
if __name__ == '__main__':
|
| 61 |
llm = GroqLLM()
|
|
|
|
| 56 |
logger.error(f"Groq API error: {e}")
|
| 57 |
return "Sorry, I encountered an error generating the response"
|
| 58 |
|
| 59 |
+
def chat_with_tools(
|
| 60 |
+
self,
|
| 61 |
+
messages: list,
|
| 62 |
+
tools: list,
|
| 63 |
+
system_prompt: str = "You are a helpful anime recommendation assistant.",
|
| 64 |
+
temperature: float = 0 # Keep temperature very low for more reliable tool calling
|
| 65 |
+
):
|
| 66 |
+
"""
|
| 67 |
+
Agentic generation that supports function/tool calling.
|
| 68 |
+
Returns the full message object so we can inspect for tool_calls.
|
| 69 |
+
"""
|
| 70 |
+
try:
|
| 71 |
+
full_messages = [
|
| 72 |
+
{"role": "system", "content": system_prompt}] + messages
|
| 73 |
+
|
| 74 |
+
response = self.client.chat.completions.create(
|
| 75 |
+
model=self.model,
|
| 76 |
+
messages=full_messages,
|
| 77 |
+
tools=tools,
|
| 78 |
+
tool_choice="auto", # This tells the LLM it can choose to use a tool or just chat
|
| 79 |
+
temperature=temperature,
|
| 80 |
+
max_tokens=1024
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
# Note: We return the actual message object, not just the content string.
|
| 84 |
+
# This allows the RAG pipeline to check `if message.tool_calls:`
|
| 85 |
+
|
| 86 |
+
return response.choices[0].message
|
| 87 |
+
|
| 88 |
+
except Exception as e:
|
| 89 |
+
logger.error(f"Groq API Tool Calling error: {e}")
|
| 90 |
+
return None
|
| 91 |
+
|
| 92 |
|
| 93 |
if __name__ == '__main__':
|
| 94 |
llm = GroqLLM()
|
src/llm/prompts.py
CHANGED
|
@@ -2,6 +2,38 @@
|
|
| 2 |
Prompt Templates for anime recommendation system
|
| 3 |
"""
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
def create_recommendation_prompt(
|
| 7 |
user_query: str,
|
|
@@ -28,47 +60,29 @@ def create_recommendation_prompt(
|
|
| 28 |
context = "\n".join(context_parts)
|
| 29 |
|
| 30 |
prompt = f"""
|
| 31 |
-
You are an expert
|
| 32 |
-
First: If the user is asking for a comparison or opinion on specific anime,
|
| 33 |
-
provide thoughtful comparison rather than a list of recommendations.
|
| 34 |
|
| 35 |
-
|
| 36 |
-
Your task is to:
|
| 37 |
-
1. Analyze the user's request carefully, paying attention to specific preferences (tone, themes, etc.)
|
| 38 |
-
2. Evaluate each retrieved anime against their user's criteria
|
| 39 |
-
3. Select the {n_recommendations} BEST matches that truly fit what they're asking for
|
| 40 |
-
4. Explain why each recommendation fits their request. Answer in a way, that sounds really casual, super relaxed, and don't explain your "system thinking" such as "User has asked for ...., So here they are..." or any such variations.
|
| 41 |
|
| 42 |
-
|
| 43 |
-
"{user_query}"
|
| 44 |
-
|
| 45 |
-
Retrieved anime from semantic search:
|
| 46 |
{context}
|
| 47 |
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
- Be honest if none of the retrieved anime are great matches
|
| 56 |
-
|
| 57 |
-
Format your response as:
|
| 58 |
-
**Recommendation 1: [Anime Title]**
|
| 59 |
-
[1-2 sentence explanation of why it matches]
|
| 60 |
-
|
| 61 |
-
**Recommendation 2: [Anime Title]**
|
| 62 |
-
[1-2 sentence explanation]
|
| 63 |
-
|
| 64 |
-
[Continue for {n_recommendations} recommendations]
|
| 65 |
|
| 66 |
-
|
|
|
|
|
|
|
| 67 |
|
| 68 |
-
|
| 69 |
-
|
| 70 |
|
| 71 |
-
|
| 72 |
"""
|
| 73 |
|
| 74 |
return prompt
|
|
|
|
| 2 |
Prompt Templates for anime recommendation system
|
| 3 |
"""
|
| 4 |
|
| 5 |
+
ANIME_SEARCH_TOOL = [
|
| 6 |
+
{
|
| 7 |
+
"type": "function",
|
| 8 |
+
"function": {
|
| 9 |
+
"name": "search_anime_database",
|
| 10 |
+
"description": "Use this tool to search the vector database for anime recommendations based on user requests. Call this whenever the user asks for recommendations, similar shows, or specific genres.",
|
| 11 |
+
"parameters": {
|
| 12 |
+
"type": "object",
|
| 13 |
+
"properties": {
|
| 14 |
+
"optimized_query": {
|
| 15 |
+
"type": "string",
|
| 16 |
+
"description": "The user's query optimized for semantic vector search. Remove conversational filler. (e.g., if user says 'hi tell me an anime like naruto', the optimized_query is 'anime similar to naruto action shounen ninja')"
|
| 17 |
+
}
|
| 18 |
+
},
|
| 19 |
+
"required": ["optimized_query"]
|
| 20 |
+
}
|
| 21 |
+
}
|
| 22 |
+
}
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
ROUTER_SYSTEM_PROMPT = """
|
| 26 |
+
You are a strict Gatekeeper for an Anime Recommendation System. Your ONLY job is to categorize user input.
|
| 27 |
+
|
| 28 |
+
# RULES:
|
| 29 |
+
1. GREETINGS: If the user says "Hi", "Hello", etc., respond naturally in text. DO NOT CALL THE TOOL.
|
| 30 |
+
2. ANIME SEARCH: Only call 'search_anime_database' if the user provides SPECIFIC criteria (e.g., "dark anime", "shows like Naruto", "romance with high score").
|
| 31 |
+
3. VAGUE/GIBBERISH: If the user says "anime", "similar", "recommend something", or types gibberish/random characters, DO NOT CALL THE TOOL. Instead, respond in text asking for specific details or preferences.
|
| 32 |
+
4. VULGARITY/OFF-TOPIC: If the user is being vulgar or asking about non-anime topics (politics, math, etc.), respond politely stating you only talk about anime. DO NOT CALL THE TOOL.
|
| 33 |
+
|
| 34 |
+
# THRESHOLD:
|
| 35 |
+
If you are less than 90% sure what the user wants, DO NOT call the tool. Ask for clarification instead.
|
| 36 |
+
"""
|
| 37 |
|
| 38 |
def create_recommendation_prompt(
|
| 39 |
user_query: str,
|
|
|
|
| 60 |
context = "\n".join(context_parts)
|
| 61 |
|
| 62 |
prompt = f"""
|
| 63 |
+
You are an expert, polite, casual and friendly anime recommender. Your goal is to give personalized anime recommendations based strictly on the provided database context.
|
|
|
|
|
|
|
| 64 |
|
| 65 |
+
User's Query: "{user_query}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
+
Available Anime Context (from vector search):
|
|
|
|
|
|
|
|
|
|
| 68 |
{context}
|
| 69 |
|
| 70 |
+
# Core Directives:
|
| 71 |
+
1. **Curate, Don't List**: Do not just repeat the context. Analyze the user's specific vibe/theme requests and pick the top {n_recommendations} absolute best matches.
|
| 72 |
+
2. **THE SEQUEL RULE**: You MUST NOT recommend direct sequels, prequels, or movies of the exact anime the user mentioned, UNLESS they explicitly ask for a watch order or sequel. (e.g., If they ask for "shows like Bleach", do NOT recommend "Bleach: Thousand-Year Blood War").
|
| 73 |
+
3. **Conversational Tone**: Speak like a relaxed anime fan chatting with a friend. DO NOT use robotic transitions like "Based on the provided context..." or "Here are your recommendations...". Just jump right into the good stuff.
|
| 74 |
+
4. **The Pitch**: For each pick, write 1-2 sentences explaining exactly *why* it fits their specific request based on the synopsis and genres.
|
| 75 |
+
5. **No Hallucinations**: If an anime lacks a synopsis, do not invent one. Explain it using its genres and your general knowledge of its themes.
|
| 76 |
+
6. **Honesty**: If the retrieved context doesn't have any genuinely good matches for the user's query, be honest. Tell them the database didn't have a perfect fit and suggest what kind of show they should look for instead.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
+
# Required Format:
|
| 79 |
+
**[Anime Title]**
|
| 80 |
+
[Your 1-2 sentence pitch on why it fits the user's exact vibe]
|
| 81 |
|
| 82 |
+
**[Anime Title]**
|
| 83 |
+
[Your pitch...]
|
| 84 |
|
| 85 |
+
(Limit to {n_recommendations} recommendations maximum.)
|
| 86 |
"""
|
| 87 |
|
| 88 |
return prompt
|
src/llm/tool_use_schema.json
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"tools": [
|
| 3 |
-
{
|
| 4 |
-
"type": "function",
|
| 5 |
-
"function": {
|
| 6 |
-
"name": "get_weather",
|
| 7 |
-
"description": "Get current weather for a location",
|
| 8 |
-
"parameters": {
|
| 9 |
-
"type": "object",
|
| 10 |
-
"properties": {
|
| 11 |
-
"location": {
|
| 12 |
-
"type": "string",
|
| 13 |
-
"description": "City and state, e.g. San Francisco, CA"
|
| 14 |
-
},
|
| 15 |
-
"unit": {
|
| 16 |
-
"type": "string",
|
| 17 |
-
"enum": ["celsius", "fahrenheit"]
|
| 18 |
-
}
|
| 19 |
-
},
|
| 20 |
-
"required": ["location"]
|
| 21 |
-
}
|
| 22 |
-
}
|
| 23 |
-
}
|
| 24 |
-
],
|
| 25 |
-
"messages": [
|
| 26 |
-
{
|
| 27 |
-
"role": "system",
|
| 28 |
-
"content": "You are a weather assistant. Respond to the user question and use tools if needed to answer the query."
|
| 29 |
-
},
|
| 30 |
-
{
|
| 31 |
-
"role": "user",
|
| 32 |
-
"content": "What's the weather in San Francisco?"
|
| 33 |
-
}
|
| 34 |
-
],
|
| 35 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/retrieval/rag_pipeline.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
|
|
| 1 |
from src.retrieval.vector_search import AnimeRetriever
|
| 2 |
from src.llm.groq_client import GroqLLM
|
| 3 |
-
from src.llm.prompts import create_recommendation_prompt, create_system_prompt
|
| 4 |
import logging
|
| 5 |
from config import settings
|
| 6 |
|
|
@@ -54,30 +55,66 @@ class AnimeRAGPipeline:
|
|
| 54 |
- retrieved_anime: raw retrieval results(for debugging)
|
| 55 |
"""
|
| 56 |
logger.info(f"\n----Processing query: {user_query}-----\n")
|
| 57 |
-
|
| 58 |
-
logger.info(f"\n[1/3] Retrieving from vector database...")
|
| 59 |
filters = filters or {}
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
retrieved_animes = self.retriever.search(
|
| 62 |
-
query=
|
| 63 |
n_results=self.retriever_k,
|
| 64 |
**filters
|
| 65 |
)
|
| 66 |
|
| 67 |
-
|
|
|
|
| 68 |
prompt = create_recommendation_prompt(
|
| 69 |
user_query=user_query,
|
| 70 |
retrieved_animes=retrieved_animes,
|
| 71 |
n_recommendations=self.recommendation_n
|
| 72 |
)
|
| 73 |
|
| 74 |
-
logger.info(
|
| 75 |
system_prompt = create_system_prompt()
|
| 76 |
|
| 77 |
recommendations = self.llm.generate(
|
| 78 |
prompt=prompt,
|
| 79 |
system_prompt=system_prompt,
|
| 80 |
-
temperature=0.
|
| 81 |
max_tokens=1500
|
| 82 |
)
|
| 83 |
|
|
|
|
| 1 |
+
import json
|
| 2 |
from src.retrieval.vector_search import AnimeRetriever
|
| 3 |
from src.llm.groq_client import GroqLLM
|
| 4 |
+
from src.llm.prompts import create_recommendation_prompt, create_system_prompt, ANIME_SEARCH_TOOL, ROUTER_SYSTEM_PROMPT
|
| 5 |
import logging
|
| 6 |
from config import settings
|
| 7 |
|
|
|
|
| 55 |
- retrieved_anime: raw retrieval results(for debugging)
|
| 56 |
"""
|
| 57 |
logger.info(f"\n----Processing query: {user_query}-----\n")
|
|
|
|
|
|
|
| 58 |
filters = filters or {}
|
| 59 |
|
| 60 |
+
# [STEP 1] The Agentic Decision Call
|
| 61 |
+
logger.info("[1/4] Asking LLM if it needs to search...")
|
| 62 |
+
|
| 63 |
+
initial_response = self.llm.chat_with_tools(
|
| 64 |
+
messages=[{"role": "user", "content": user_query}],
|
| 65 |
+
tools=ANIME_SEARCH_TOOL,
|
| 66 |
+
system_prompt=ROUTER_SYSTEM_PROMPT
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
# [STEP 2] Check if the LLM decided to call the tool
|
| 70 |
+
if not initial_response:
|
| 71 |
+
logger.error("Groq API failed completely.")
|
| 72 |
+
return {
|
| 73 |
+
"query": user_query,
|
| 74 |
+
"recommendations": "Sorry, I'm having trouble processing your query. Can you be more clear and try again?",
|
| 75 |
+
"retrieved_count": 0,
|
| 76 |
+
"retrieved_animes": []
|
| 77 |
+
}
|
| 78 |
+
if not initial_response.tool_calls:
|
| 79 |
+
logger.info(
|
| 80 |
+
"[2/4] No search needed. Returning conversational response.")
|
| 81 |
+
return {
|
| 82 |
+
"query": user_query,
|
| 83 |
+
"retrieved_count": 0,
|
| 84 |
+
"recommendations": initial_response.content,
|
| 85 |
+
"retrieved_animes": []
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
# [STEP 3] The LLM wants to search. Extract its optimized parameters.
|
| 89 |
+
logger.info("[2/4] Tool called! Executing vector search...")
|
| 90 |
+
tool_call = initial_response.tool_calls[0]
|
| 91 |
+
tool_args = json.loads(tool_call.function.arguments)
|
| 92 |
+
|
| 93 |
+
logger.info(f"Tool called: [{tool_call}] with args: [{tool_args}]\n")
|
| 94 |
+
|
| 95 |
+
optimized_query = tool_args.get("optimized_query", user_query)
|
| 96 |
+
|
| 97 |
retrieved_animes = self.retriever.search(
|
| 98 |
+
query=optimized_query,
|
| 99 |
n_results=self.retriever_k,
|
| 100 |
**filters
|
| 101 |
)
|
| 102 |
|
| 103 |
+
# [STEP 4] The Final Recommendation Call
|
| 104 |
+
logger.info("[3/4] Creating prompt with retrieved content...")
|
| 105 |
prompt = create_recommendation_prompt(
|
| 106 |
user_query=user_query,
|
| 107 |
retrieved_animes=retrieved_animes,
|
| 108 |
n_recommendations=self.recommendation_n
|
| 109 |
)
|
| 110 |
|
| 111 |
+
logger.info("[4/4] LLM generating final response...")
|
| 112 |
system_prompt = create_system_prompt()
|
| 113 |
|
| 114 |
recommendations = self.llm.generate(
|
| 115 |
prompt=prompt,
|
| 116 |
system_prompt=system_prompt,
|
| 117 |
+
temperature=0.5,
|
| 118 |
max_tokens=1500
|
| 119 |
)
|
| 120 |
|