Pushkar02-n commited on
Commit
4564881
·
1 Parent(s): f4d0448

Make Chatbot Agentic

Browse files
src/api/main.py CHANGED
@@ -1,4 +1,5 @@
1
- from fastapi import FastAPI, HTTPException, status
 
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel, Field
4
  import uvicorn
@@ -6,12 +7,30 @@ from src.api.schemas import RecommendationRequest, RecommendationResponse
6
  import time
7
  from config import settings
8
  import traceback
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- from src.retrieval.rag_pipeline import AnimeRAGPipeline
11
 
12
  app = FastAPI(title="Anime Recommendation API",
13
  description="RAG-powered anime recommendation system",
14
- version="1.0.0")
 
15
 
16
 
17
  app.add_middleware(
@@ -22,16 +41,6 @@ app.add_middleware(
22
  allow_headers=["*"]
23
  )
24
 
25
- pipeline = None
26
-
27
-
28
- def get_pipeline():
29
- """Lazy initialization of pipeline"""
30
- global pipeline
31
- if pipeline is None:
32
- pipeline = AnimeRAGPipeline(retriever_k=10)
33
- return pipeline
34
-
35
 
36
  @app.get("/")
37
  async def root():
@@ -44,7 +53,7 @@ async def root():
44
 
45
 
46
  @app.post("/recommend", response_model=RecommendationResponse)
47
- async def get_recommendations(request: RecommendationRequest):
48
  """
49
  Get anime recommendation based on user query
50
 
@@ -58,7 +67,7 @@ async def get_recommendations(request: RecommendationRequest):
58
  ```
59
  """
60
  try:
61
- rag_pipeline = get_pipeline()
62
 
63
  rag_pipeline.recommendation_n = request.n_results
64
 
@@ -76,7 +85,8 @@ async def get_recommendations(request: RecommendationRequest):
76
  )
77
  end_time = time.time()
78
 
79
- print(f"Retrieved anime : \n{result["retrieved_count"]}")
 
80
  print(f"Result Recommendations: \n{result["recommendations"][:20]}")
81
  return RecommendationResponse(
82
  query=result["query"],
@@ -96,9 +106,9 @@ async def get_recommendations(request: RecommendationRequest):
96
 
97
 
98
  @app.get("/stats")
99
- async def get_stats():
100
  """Get system statistics"""
101
- rag_pipeline = get_pipeline()
102
 
103
  return {
104
  "total_anime": rag_pipeline.retriever.collection.count(),
 
1
+ from src.retrieval.rag_pipeline import AnimeRAGPipeline
2
+ from fastapi import FastAPI, HTTPException, status, Request
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from pydantic import BaseModel, Field
5
  import uvicorn
 
7
  import time
8
  from config import settings
9
  import traceback
10
+ import logging
11
+ from contextlib import asynccontextmanager
12
+
13
+ logger = logging.basicConfig(level=logging.INFO)
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ @asynccontextmanager
18
+ async def lifespan(app: FastAPI):
19
+ """
20
+ Handles startup and shutdown of the Anime RAG Pipeline.
21
+ Replaces lazy global initialization with app state.
22
+ """
23
+ print("Initializing Anime RAG Pipeline...")
24
+ app.state.pipeline = AnimeRAGPipeline(retriever_k=10)
25
+
26
+ yield
27
+ print("Shutting down... Cleaning up resources.")
28
 
 
29
 
30
  app = FastAPI(title="Anime Recommendation API",
31
  description="RAG-powered anime recommendation system",
32
+ version="1.0.0",
33
+ lifespan=lifespan)
34
 
35
 
36
  app.add_middleware(
 
41
  allow_headers=["*"]
42
  )
43
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  @app.get("/")
46
  async def root():
 
53
 
54
 
55
  @app.post("/recommend", response_model=RecommendationResponse)
56
+ async def get_recommendations(request: RecommendationRequest, fastapi_req: Request):
57
  """
58
  Get anime recommendation based on user query
59
 
 
67
  ```
68
  """
69
  try:
70
+ rag_pipeline = fastapi_req.app.state.pipeline
71
 
72
  rag_pipeline.recommendation_n = request.n_results
73
 
 
85
  )
86
  end_time = time.time()
87
 
88
+ # print(f"Retrieved anime : \n{result["retrieved_animes"][0]}")
89
+ print(f"Retrieved anime Count : \n{result["retrieved_count"]}")
90
  print(f"Result Recommendations: \n{result["recommendations"][:20]}")
91
  return RecommendationResponse(
92
  query=result["query"],
 
106
 
107
 
108
  @app.get("/stats")
109
+ async def get_stats(fastapi_req: Request):
110
  """Get system statistics"""
111
+ rag_pipeline = fastapi_req.app.state.pipeline
112
 
113
  return {
114
  "total_anime": rag_pipeline.retriever.collection.count(),
src/data_ingestion/saving_data_to_postgres.py ADDED
File without changes
src/llm/groq_client.py CHANGED
@@ -56,6 +56,39 @@ class GroqLLM:
56
  logger.error(f"Groq API error: {e}")
57
  return "Sorry, I encountered an error generating the response"
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  if __name__ == '__main__':
61
  llm = GroqLLM()
 
56
  logger.error(f"Groq API error: {e}")
57
  return "Sorry, I encountered an error generating the response"
58
 
59
+ def chat_with_tools(
60
+ self,
61
+ messages: list,
62
+ tools: list,
63
+ system_prompt: str = "You are a helpful anime recommendation assistant.",
64
+ temperature: float = 0 # Keep temperature very low for more reliable tool calling
65
+ ):
66
+ """
67
+ Agentic generation that supports function/tool calling.
68
+ Returns the full message object so we can inspect for tool_calls.
69
+ """
70
+ try:
71
+ full_messages = [
72
+ {"role": "system", "content": system_prompt}] + messages
73
+
74
+ response = self.client.chat.completions.create(
75
+ model=self.model,
76
+ messages=full_messages,
77
+ tools=tools,
78
+ tool_choice="auto", # This tells the LLM it can choose to use a tool or just chat
79
+ temperature=temperature,
80
+ max_tokens=1024
81
+ )
82
+
83
+ # Note: We return the actual message object, not just the content string.
84
+ # This allows the RAG pipeline to check `if message.tool_calls:`
85
+
86
+ return response.choices[0].message
87
+
88
+ except Exception as e:
89
+ logger.error(f"Groq API Tool Calling error: {e}")
90
+ return None
91
+
92
 
93
  if __name__ == '__main__':
94
  llm = GroqLLM()
src/llm/prompts.py CHANGED
@@ -2,6 +2,38 @@
2
  Prompt Templates for anime recommendation system
3
  """
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  def create_recommendation_prompt(
7
  user_query: str,
@@ -28,47 +60,29 @@ def create_recommendation_prompt(
28
  context = "\n".join(context_parts)
29
 
30
  prompt = f"""
31
- You are an expert anime recommendation assistant. A user has asked for recommendations, and you are provided in context relevant similar anime from the database.
32
- First: If the user is asking for a comparison or opinion on specific anime,
33
- provide thoughtful comparison rather than a list of recommendations.
34
 
35
- ## 1. If recommendations, only then:
36
- Your task is to:
37
- 1. Analyze the user's request carefully, paying attention to specific preferences (tone, themes, etc.)
38
- 2. Evaluate each retrieved anime against their user's criteria
39
- 3. Select the {n_recommendations} BEST matches that truly fit what they're asking for
40
- 4. Explain why each recommendation fits their request. Answer in a way, that sounds really casual, super relaxed, and don't explain your "system thinking" such as "User has asked for ...., So here they are..." or any such variations.
41
 
42
- User's Query:
43
- "{user_query}"
44
-
45
- Retrieved anime from semantic search:
46
  {context}
47
 
48
- Instructions:
49
- - If the user mentioned specific preferences (e.g., "lighter", "darker", "more action"), prioritize those
50
- - Don't just list all retrieved anime - SELECT the best {n_recommendations} that truly match
51
- - For each recommendation, explain in 1-2 sentences WHY it matches their request
52
- - If some retrieved anime DON'T match the user's specific criteria, exclude them
53
- - If some retrieved anime are the sequel (can be identified by name or other information) do not mention them, UNLESS user specifically asks.
54
- -> If user SPEICIFICALLY Ask about some sequel, and there's no synopsis, tell the User something similar to Synopsis of prequel(that might be in context) and additionally answer based on other featurs like genre, demographics, etc. DO NOT INVENT SYNOPSIS !!!
55
- - Be honest if none of the retrieved anime are great matches
56
-
57
- Format your response as:
58
- **Recommendation 1: [Anime Title]**
59
- [1-2 sentence explanation of why it matches]
60
-
61
- **Recommendation 2: [Anime Title]**
62
- [1-2 sentence explanation]
63
-
64
- [Continue for {n_recommendations} recommendations]
65
 
66
- If you think the retrieved anime don't match the request well, say so and explain what type of anime would be better.
 
 
67
 
68
- ## 2. If the user wants to talk casually about some anime they recently watched, engage into meaningful conversation with them, and ONLY when it seems user wants recommendations, give recommendation like mentioned in ##1 above.
69
- ## 3. FOR ANY OTHER Gibberish queries, not matching to the "Anime world" except some occasional GREETINGS(in which case you have to greet properly), just give a generic message that "You are not capable of answering that, politely".
70
 
71
- **Response Style**: Friendly, Casual, and do not mention the programmer(or anything similar to what/how you process/look for data). Just a Intelligent Bot answering to a user.
72
  """
73
 
74
  return prompt
 
2
  Prompt Templates for anime recommendation system
3
  """
4
 
5
+ ANIME_SEARCH_TOOL = [
6
+ {
7
+ "type": "function",
8
+ "function": {
9
+ "name": "search_anime_database",
10
+ "description": "Use this tool to search the vector database for anime recommendations based on user requests. Call this whenever the user asks for recommendations, similar shows, or specific genres.",
11
+ "parameters": {
12
+ "type": "object",
13
+ "properties": {
14
+ "optimized_query": {
15
+ "type": "string",
16
+ "description": "The user's query optimized for semantic vector search. Remove conversational filler. (e.g., if user says 'hi tell me an anime like naruto', the optimized_query is 'anime similar to naruto action shounen ninja')"
17
+ }
18
+ },
19
+ "required": ["optimized_query"]
20
+ }
21
+ }
22
+ }
23
+ ]
24
+
25
+ ROUTER_SYSTEM_PROMPT = """
26
+ You are a strict Gatekeeper for an Anime Recommendation System. Your ONLY job is to categorize user input.
27
+
28
+ # RULES:
29
+ 1. GREETINGS: If the user says "Hi", "Hello", etc., respond naturally in text. DO NOT CALL THE TOOL.
30
+ 2. ANIME SEARCH: Only call 'search_anime_database' if the user provides SPECIFIC criteria (e.g., "dark anime", "shows like Naruto", "romance with high score").
31
+ 3. VAGUE/GIBBERISH: If the user says "anime", "similar", "recommend something", or types gibberish/random characters, DO NOT CALL THE TOOL. Instead, respond in text asking for specific details or preferences.
32
+ 4. VULGARITY/OFF-TOPIC: If the user is being vulgar or asking about non-anime topics (politics, math, etc.), respond politely stating you only talk about anime. DO NOT CALL THE TOOL.
33
+
34
+ # THRESHOLD:
35
+ If you are less than 90% sure what the user wants, DO NOT call the tool. Ask for clarification instead.
36
+ """
37
 
38
  def create_recommendation_prompt(
39
  user_query: str,
 
60
  context = "\n".join(context_parts)
61
 
62
  prompt = f"""
63
+ You are an expert, polite, casual and friendly anime recommender. Your goal is to give personalized anime recommendations based strictly on the provided database context.
 
 
64
 
65
+ User's Query: "{user_query}"
 
 
 
 
 
66
 
67
+ Available Anime Context (from vector search):
 
 
 
68
  {context}
69
 
70
+ # Core Directives:
71
+ 1. **Curate, Don't List**: Do not just repeat the context. Analyze the user's specific vibe/theme requests and pick the top {n_recommendations} absolute best matches.
72
+ 2. **THE SEQUEL RULE**: You MUST NOT recommend direct sequels, prequels, or movies of the exact anime the user mentioned, UNLESS they explicitly ask for a watch order or sequel. (e.g., If they ask for "shows like Bleach", do NOT recommend "Bleach: Thousand-Year Blood War").
73
+ 3. **Conversational Tone**: Speak like a relaxed anime fan chatting with a friend. DO NOT use robotic transitions like "Based on the provided context..." or "Here are your recommendations...". Just jump right into the good stuff.
74
+ 4. **The Pitch**: For each pick, write 1-2 sentences explaining exactly *why* it fits their specific request based on the synopsis and genres.
75
+ 5. **No Hallucinations**: If an anime lacks a synopsis, do not invent one. Explain it using its genres and your general knowledge of its themes.
76
+ 6. **Honesty**: If the retrieved context doesn't have any genuinely good matches for the user's query, be honest. Tell them the database didn't have a perfect fit and suggest what kind of show they should look for instead.
 
 
 
 
 
 
 
 
 
 
77
 
78
+ # Required Format:
79
+ **[Anime Title]**
80
+ [Your 1-2 sentence pitch on why it fits the user's exact vibe]
81
 
82
+ **[Anime Title]**
83
+ [Your pitch...]
84
 
85
+ (Limit to {n_recommendations} recommendations maximum.)
86
  """
87
 
88
  return prompt
src/llm/tool_use_schema.json DELETED
@@ -1,35 +0,0 @@
1
- {
2
- "tools": [
3
- {
4
- "type": "function",
5
- "function": {
6
- "name": "get_weather",
7
- "description": "Get current weather for a location",
8
- "parameters": {
9
- "type": "object",
10
- "properties": {
11
- "location": {
12
- "type": "string",
13
- "description": "City and state, e.g. San Francisco, CA"
14
- },
15
- "unit": {
16
- "type": "string",
17
- "enum": ["celsius", "fahrenheit"]
18
- }
19
- },
20
- "required": ["location"]
21
- }
22
- }
23
- }
24
- ],
25
- "messages": [
26
- {
27
- "role": "system",
28
- "content": "You are a weather assistant. Respond to the user question and use tools if needed to answer the query."
29
- },
30
- {
31
- "role": "user",
32
- "content": "What's the weather in San Francisco?"
33
- }
34
- ],
35
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/retrieval/rag_pipeline.py CHANGED
@@ -1,6 +1,7 @@
 
1
  from src.retrieval.vector_search import AnimeRetriever
2
  from src.llm.groq_client import GroqLLM
3
- from src.llm.prompts import create_recommendation_prompt, create_system_prompt
4
  import logging
5
  from config import settings
6
 
@@ -54,30 +55,66 @@ class AnimeRAGPipeline:
54
  - retrieved_anime: raw retrieval results(for debugging)
55
  """
56
  logger.info(f"\n----Processing query: {user_query}-----\n")
57
-
58
- logger.info(f"\n[1/3] Retrieving from vector database...")
59
  filters = filters or {}
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  retrieved_animes = self.retriever.search(
62
- query=user_query,
63
  n_results=self.retriever_k,
64
  **filters
65
  )
66
 
67
- logger.info(f"\n[2/3] Creating prompt with retrieved content...")
 
68
  prompt = create_recommendation_prompt(
69
  user_query=user_query,
70
  retrieved_animes=retrieved_animes,
71
  n_recommendations=self.recommendation_n
72
  )
73
 
74
- logger.info(f"\n[3/3] LLM Reasoning about recommendations...")
75
  system_prompt = create_system_prompt()
76
 
77
  recommendations = self.llm.generate(
78
  prompt=prompt,
79
  system_prompt=system_prompt,
80
- temperature=0.4,
81
  max_tokens=1500
82
  )
83
 
 
1
+ import json
2
  from src.retrieval.vector_search import AnimeRetriever
3
  from src.llm.groq_client import GroqLLM
4
+ from src.llm.prompts import create_recommendation_prompt, create_system_prompt, ANIME_SEARCH_TOOL, ROUTER_SYSTEM_PROMPT
5
  import logging
6
  from config import settings
7
 
 
55
  - retrieved_anime: raw retrieval results(for debugging)
56
  """
57
  logger.info(f"\n----Processing query: {user_query}-----\n")
 
 
58
  filters = filters or {}
59
 
60
+ # [STEP 1] The Agentic Decision Call
61
+ logger.info("[1/4] Asking LLM if it needs to search...")
62
+
63
+ initial_response = self.llm.chat_with_tools(
64
+ messages=[{"role": "user", "content": user_query}],
65
+ tools=ANIME_SEARCH_TOOL,
66
+ system_prompt=ROUTER_SYSTEM_PROMPT
67
+ )
68
+
69
+ # [STEP 2] Check if the LLM decided to call the tool
70
+ if not initial_response:
71
+ logger.error("Groq API failed completely.")
72
+ return {
73
+ "query": user_query,
74
+ "recommendations": "Sorry, I'm having trouble processing your query. Can you be more clear and try again?",
75
+ "retrieved_count": 0,
76
+ "retrieved_animes": []
77
+ }
78
+ if not initial_response.tool_calls:
79
+ logger.info(
80
+ "[2/4] No search needed. Returning conversational response.")
81
+ return {
82
+ "query": user_query,
83
+ "retrieved_count": 0,
84
+ "recommendations": initial_response.content,
85
+ "retrieved_animes": []
86
+ }
87
+
88
+ # [STEP 3] The LLM wants to search. Extract its optimized parameters.
89
+ logger.info("[2/4] Tool called! Executing vector search...")
90
+ tool_call = initial_response.tool_calls[0]
91
+ tool_args = json.loads(tool_call.function.arguments)
92
+
93
+ logger.info(f"Tool called: [{tool_call}] with args: [{tool_args}]\n")
94
+
95
+ optimized_query = tool_args.get("optimized_query", user_query)
96
+
97
  retrieved_animes = self.retriever.search(
98
+ query=optimized_query,
99
  n_results=self.retriever_k,
100
  **filters
101
  )
102
 
103
+ # [STEP 4] The Final Recommendation Call
104
+ logger.info("[3/4] Creating prompt with retrieved content...")
105
  prompt = create_recommendation_prompt(
106
  user_query=user_query,
107
  retrieved_animes=retrieved_animes,
108
  n_recommendations=self.recommendation_n
109
  )
110
 
111
+ logger.info("[4/4] LLM generating final response...")
112
  system_prompt = create_system_prompt()
113
 
114
  recommendations = self.llm.generate(
115
  prompt=prompt,
116
  system_prompt=system_prompt,
117
+ temperature=0.5,
118
  max_tokens=1500
119
  )
120