Spaces:

Pushkar02-n
/

AnimeRAGSystem

Sleeping

App Files Files Community

Pushkar02-n commited on Mar 1

Commit

4564881

1 Parent(s): f4d0448

Make Chatbot Agentic

Browse files

Files changed (6) hide show

src/api/main.py +28 -18
src/data_ingestion/saving_data_to_postgres.py +0 -0
src/llm/groq_client.py +33 -0
src/llm/prompts.py +48 -34
src/llm/tool_use_schema.json +0 -35
src/retrieval/rag_pipeline.py +44 -7

src/api/main.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from fastapi import FastAPI, HTTPException, status
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
 import uvicorn
@@ -6,12 +7,30 @@ from src.api.schemas import RecommendationRequest, RecommendationResponse
 import time
 from config import settings
 import traceback
-from src.retrieval.rag_pipeline import AnimeRAGPipeline
 app = FastAPI(title="Anime Recommendation API",
               description="RAG-powered anime recommendation system",
-              version="1.0.0")
 app.add_middleware(
@@ -22,16 +41,6 @@ app.add_middleware(
     allow_headers=["*"]
 )
-pipeline = None
-def get_pipeline():
-    """Lazy initialization of pipeline"""
-    global pipeline
-    if pipeline is None:
-        pipeline = AnimeRAGPipeline(retriever_k=10)
-    return pipeline
 @app.get("/")
 async def root():
@@ -44,7 +53,7 @@ async def root():
 @app.post("/recommend", response_model=RecommendationResponse)
-async def get_recommendations(request: RecommendationRequest):
     """
     Get anime recommendation based on user query
@@ -58,7 +67,7 @@ async def get_recommendations(request: RecommendationRequest):
     ```
     """
     try:
-        rag_pipeline = get_pipeline()
         rag_pipeline.recommendation_n = request.n_results
@@ -76,7 +85,8 @@ async def get_recommendations(request: RecommendationRequest):
         )
         end_time = time.time()
-        print(f"Retrieved anime : \n{result["retrieved_count"]}")
         print(f"Result Recommendations: \n{result["recommendations"][:20]}")
         return RecommendationResponse(
             query=result["query"],
@@ -96,9 +106,9 @@ async def get_recommendations(request: RecommendationRequest):
 @app.get("/stats")
-async def get_stats():
     """Get system statistics"""
-    rag_pipeline = get_pipeline()
     return {
         "total_anime": rag_pipeline.retriever.collection.count(),

+from src.retrieval.rag_pipeline import AnimeRAGPipeline
+from fastapi import FastAPI, HTTPException, status, Request
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel, Field
 import uvicorn
 import time
 from config import settings
 import traceback
+import logging
+from contextlib import asynccontextmanager
+logger = logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """
+    Handles startup and shutdown of the Anime RAG Pipeline.
+    Replaces lazy global initialization with app state.
+    """
+    print("Initializing Anime RAG Pipeline...")
+    app.state.pipeline = AnimeRAGPipeline(retriever_k=10)
+    yield
+    print("Shutting down... Cleaning up resources.")
 app = FastAPI(title="Anime Recommendation API",
               description="RAG-powered anime recommendation system",
+              version="1.0.0",
+              lifespan=lifespan)
 app.add_middleware(
     allow_headers=["*"]
 )
 @app.get("/")
 async def root():
 @app.post("/recommend", response_model=RecommendationResponse)
+async def get_recommendations(request: RecommendationRequest, fastapi_req: Request):
     """
     Get anime recommendation based on user query
     ```
     """
     try:
+        rag_pipeline = fastapi_req.app.state.pipeline
         rag_pipeline.recommendation_n = request.n_results
         )
         end_time = time.time()
+        # print(f"Retrieved anime : \n{result["retrieved_animes"][0]}")
+        print(f"Retrieved anime Count : \n{result["retrieved_count"]}")
         print(f"Result Recommendations: \n{result["recommendations"][:20]}")
         return RecommendationResponse(
             query=result["query"],
 @app.get("/stats")
+async def get_stats(fastapi_req: Request):
     """Get system statistics"""
+    rag_pipeline = fastapi_req.app.state.pipeline
     return {
         "total_anime": rag_pipeline.retriever.collection.count(),

src/data_ingestion/saving_data_to_postgres.py ADDED Viewed

File without changes

src/llm/groq_client.py CHANGED Viewed

@@ -56,6 +56,39 @@ class GroqLLM:
             logger.error(f"Groq API error: {e}")
             return "Sorry, I encountered an error generating the response"
 if __name__ == '__main__':
     llm = GroqLLM()

             logger.error(f"Groq API error: {e}")
             return "Sorry, I encountered an error generating the response"
+    def chat_with_tools(
+        self,
+        messages: list,
+        tools: list,
+        system_prompt: str = "You are a helpful anime recommendation assistant.",
+        temperature: float = 0  # Keep temperature very low for more reliable tool calling
+    ):
+        """
+        Agentic generation that supports function/tool calling.
+        Returns the full message object so we can inspect for tool_calls.
+        """
+        try:
+            full_messages = [
+                {"role": "system", "content": system_prompt}] + messages
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=full_messages,
+                tools=tools,
+                tool_choice="auto",  # This tells the LLM it can choose to use a tool or just chat
+                temperature=temperature,
+                max_tokens=1024
+            )
+            # Note: We return the actual message object, not just the content string.
+            # This allows the RAG pipeline to check `if message.tool_calls:`
+            return response.choices[0].message
+        except Exception as e:
+            logger.error(f"Groq API Tool Calling error: {e}")
+            return None
 if __name__ == '__main__':
     llm = GroqLLM()

src/llm/prompts.py CHANGED Viewed

@@ -2,6 +2,38 @@
 Prompt Templates for anime recommendation system
 """
 def create_recommendation_prompt(
         user_query: str,
@@ -28,47 +60,29 @@ def create_recommendation_prompt(
     context = "\n".join(context_parts)
     prompt = f"""
-You are an expert anime recommendation assistant. A user has asked for recommendations, and you are provided in context relevant similar anime from the database.
-First: If the user is asking for a comparison or opinion on specific anime,
-provide thoughtful comparison rather than a list of recommendations.
-## 1. If recommendations, only then:
-Your task is to:
-1. Analyze the user's request carefully, paying attention to specific preferences (tone, themes, etc.)
-2. Evaluate each retrieved anime against their user's criteria
-3. Select the {n_recommendations} BEST matches that truly fit what they're asking for
-4. Explain why each recommendation fits their request. Answer in a way, that sounds really casual, super relaxed, and don't explain your "system thinking" such as "User has asked for ...., So here they are..." or any such variations.
-User's Query:
-"{user_query}"
-Retrieved anime from semantic search:
 {context}
-Instructions:
-- If the user mentioned specific preferences (e.g., "lighter", "darker", "more action"), prioritize those
-- Don't just list all retrieved anime - SELECT the best {n_recommendations} that truly match
-- For each recommendation, explain in 1-2 sentences WHY it matches their request
-- If some retrieved anime DON'T match the user's specific criteria, exclude them
-- If some retrieved anime are the sequel (can be identified by name or other information) do not mention them, UNLESS user specifically asks.
-    -> If user SPEICIFICALLY Ask about some sequel, and there's no synopsis, tell the User something similar to Synopsis of prequel(that might be in context) and additionally answer based on other featurs like genre, demographics, etc. DO NOT INVENT SYNOPSIS !!!
-- Be honest if none of the retrieved anime are great matches
-Format your response as:
-**Recommendation 1: [Anime Title]**
-[1-2 sentence explanation of why it matches]
-**Recommendation 2: [Anime Title]**
-[1-2 sentence explanation]
-[Continue for {n_recommendations} recommendations]
-If you think the retrieved anime don't match the request well, say so and explain what type of anime would be better.
-## 2. If the user wants to talk casually about some anime they recently watched, engage into meaningful conversation with them, and ONLY when it seems user wants recommendations, give recommendation like mentioned in ##1 above.
-## 3. FOR ANY OTHER Gibberish queries, not matching to the "Anime world" except some occasional GREETINGS(in which case you have to greet properly), just give a generic message that "You are not capable of answering that, politely".
-**Response Style**: Friendly, Casual, and do not mention the programmer(or anything similar to what/how you process/look for data). Just a Intelligent Bot answering to a user.
 """
     return prompt

 Prompt Templates for anime recommendation system
 """
+ANIME_SEARCH_TOOL = [
+    {
+        "type": "function",
+        "function": {
+            "name": "search_anime_database",
+            "description": "Use this tool to search the vector database for anime recommendations based on user requests. Call this whenever the user asks for recommendations, similar shows, or specific genres.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "optimized_query": {
+                        "type": "string",
+                        "description": "The user's query optimized for semantic vector search. Remove conversational filler. (e.g., if user says 'hi tell me an anime like naruto', the optimized_query is 'anime similar to naruto action shounen ninja')"
+                    }
+                },
+                "required": ["optimized_query"]
+            }
+        }
+    }
+]
+ROUTER_SYSTEM_PROMPT = """
+You are a strict Gatekeeper for an Anime Recommendation System. Your ONLY job is to categorize user input.
+# RULES:
+1. GREETINGS: If the user says "Hi", "Hello", etc., respond naturally in text. DO NOT CALL THE TOOL.
+2. ANIME SEARCH: Only call 'search_anime_database' if the user provides SPECIFIC criteria (e.g., "dark anime", "shows like Naruto", "romance with high score").
+3. VAGUE/GIBBERISH: If the user says "anime", "similar", "recommend something", or types gibberish/random characters, DO NOT CALL THE TOOL. Instead, respond in text asking for specific details or preferences.
+4. VULGARITY/OFF-TOPIC: If the user is being vulgar or asking about non-anime topics (politics, math, etc.), respond politely stating you only talk about anime. DO NOT CALL THE TOOL.
+# THRESHOLD:
+If you are less than 90% sure what the user wants, DO NOT call the tool. Ask for clarification instead.
+"""
 def create_recommendation_prompt(
         user_query: str,
     context = "\n".join(context_parts)
     prompt = f"""
+You are an expert, polite, casual and friendly anime recommender. Your goal is to give personalized anime recommendations based strictly on the provided database context.
+User's Query: "{user_query}"
+Available Anime Context (from vector search):
 {context}
+# Core Directives:
+1. **Curate, Don't List**: Do not just repeat the context. Analyze the user's specific vibe/theme requests and pick the top {n_recommendations} absolute best matches.
+2. **THE SEQUEL RULE**: You MUST NOT recommend direct sequels, prequels, or movies of the exact anime the user mentioned, UNLESS they explicitly ask for a watch order or sequel. (e.g., If they ask for "shows like Bleach", do NOT recommend "Bleach: Thousand-Year Blood War").
+3. **Conversational Tone**: Speak like a relaxed anime fan chatting with a friend. DO NOT use robotic transitions like "Based on the provided context..." or "Here are your recommendations...". Just jump right into the good stuff.
+4. **The Pitch**: For each pick, write 1-2 sentences explaining exactly *why* it fits their specific request based on the synopsis and genres.
+5. **No Hallucinations**: If an anime lacks a synopsis, do not invent one. Explain it using its genres and your general knowledge of its themes.
+6. **Honesty**: If the retrieved context doesn't have any genuinely good matches for the user's query, be honest. Tell them the database didn't have a perfect fit and suggest what kind of show they should look for instead.
+# Required Format:
+**[Anime Title]**
+[Your 1-2 sentence pitch on why it fits the user's exact vibe]
+**[Anime Title]**
+[Your pitch...]
+(Limit to {n_recommendations} recommendations maximum.)
 """
     return prompt

src/llm/tool_use_schema.json DELETED Viewed

@@ -1,35 +0,0 @@
-{
-  "tools": [
-    {
-      "type": "function",
-      "function": {
-        "name": "get_weather",
-        "description": "Get current weather for a location",
-        "parameters": {
-          "type": "object",
-          "properties": {
-            "location": {
-              "type": "string",
-              "description": "City and state, e.g. San Francisco, CA"
-            },
-            "unit": {
-              "type": "string",
-              "enum": ["celsius", "fahrenheit"]
-            }
-          },
-          "required": ["location"]
-        }
-      }
-    }
-  ],
-  "messages": [
-    {
-      "role": "system",
-      "content": "You are a weather assistant. Respond to the user question and use tools if needed to answer the query."
-    },
-    {
-      "role": "user",
-      "content": "What's the weather in San Francisco?"
-    }
-  ],
-}

src/retrieval/rag_pipeline.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from src.retrieval.vector_search import AnimeRetriever
 from src.llm.groq_client import GroqLLM
-from src.llm.prompts import create_recommendation_prompt, create_system_prompt
 import logging
 from config import settings
@@ -54,30 +55,66 @@ class AnimeRAGPipeline:
             - retrieved_anime: raw retrieval results(for debugging)
         """
         logger.info(f"\n----Processing query: {user_query}-----\n")
-        logger.info(f"\n[1/3] Retrieving from vector database...")
         filters = filters or {}
         retrieved_animes = self.retriever.search(
-            query=user_query,
             n_results=self.retriever_k,
             **filters
         )
-        logger.info(f"\n[2/3] Creating prompt with retrieved content...")
         prompt = create_recommendation_prompt(
             user_query=user_query,
             retrieved_animes=retrieved_animes,
             n_recommendations=self.recommendation_n
         )
-        logger.info(f"\n[3/3] LLM Reasoning about recommendations...")
         system_prompt = create_system_prompt()
         recommendations = self.llm.generate(
             prompt=prompt,
             system_prompt=system_prompt,
-            temperature=0.4,
             max_tokens=1500
         )

+import json
 from src.retrieval.vector_search import AnimeRetriever
 from src.llm.groq_client import GroqLLM
+from src.llm.prompts import create_recommendation_prompt, create_system_prompt, ANIME_SEARCH_TOOL, ROUTER_SYSTEM_PROMPT
 import logging
 from config import settings
             - retrieved_anime: raw retrieval results(for debugging)
         """
         logger.info(f"\n----Processing query: {user_query}-----\n")
         filters = filters or {}
+        # [STEP 1] The Agentic Decision Call
+        logger.info("[1/4] Asking LLM if it needs to search...")
+        initial_response = self.llm.chat_with_tools(
+            messages=[{"role": "user", "content": user_query}],
+            tools=ANIME_SEARCH_TOOL,
+            system_prompt=ROUTER_SYSTEM_PROMPT
+        )
+        # [STEP 2] Check if the LLM decided to call the tool
+        if not initial_response:
+            logger.error("Groq API failed completely.")
+            return {
+                "query": user_query,
+                "recommendations": "Sorry, I'm having trouble processing your query. Can you be more clear and try again?",
+                "retrieved_count": 0,
+                "retrieved_animes": []
+            }
+        if not initial_response.tool_calls:
+            logger.info(
+                "[2/4] No search needed. Returning conversational response.")
+            return {
+                "query": user_query,
+                "retrieved_count": 0,
+                "recommendations": initial_response.content,
+                "retrieved_animes": []
+            }
+        # [STEP 3] The LLM wants to search. Extract its optimized parameters.
+        logger.info("[2/4] Tool called! Executing vector search...")
+        tool_call = initial_response.tool_calls[0]
+        tool_args = json.loads(tool_call.function.arguments)
+        logger.info(f"Tool called: [{tool_call}] with args: [{tool_args}]\n")
+        optimized_query = tool_args.get("optimized_query", user_query)
         retrieved_animes = self.retriever.search(
+            query=optimized_query,
             n_results=self.retriever_k,
             **filters
         )
+        # [STEP 4] The Final Recommendation Call
+        logger.info("[3/4] Creating prompt with retrieved content...")
         prompt = create_recommendation_prompt(
             user_query=user_query,
             retrieved_animes=retrieved_animes,
             n_recommendations=self.recommendation_n
         )
+        logger.info("[4/4] LLM generating final response...")
         system_prompt = create_system_prompt()
         recommendations = self.llm.generate(
             prompt=prompt,
             system_prompt=system_prompt,
+            temperature=0.5,
             max_tokens=1500
         )