Spaces:

avneesh123
/

Hybrid-GraphRAG-Agent

Sleeping

File size: 9,756 Bytes


import os
import time
import traceback
from dotenv import load_dotenv
from langchain_core.messages import convert_to_messages
from langchain_core.tools import tool
from langchain_groq import ChatGroq
from langchain_tavily import TavilySearch
from langgraph.prebuilt import create_react_agent
from langgraph_supervisor import create_supervisor
from langchain_google_genai import ChatGoogleGenerativeAI
from langsmith import traceable
from e2b_code_interpreter import Sandbox
from langchain_chroma import Chroma
from langchain_community.graphs import Neo4jGraph
from langchain_cohere import ChatCohere, CohereEmbeddings
from langchain_community.chains.graph_qa.cypher import GraphCypherQAChain
from langchain_core.prompts import PromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_mistralai import ChatMistralAI

# --- WRAPPER FUNCTION START ---
def get_agent_app():
    # --- Load Environment ---
    load_dotenv()

    # --- Environment Keys (Inside function to capture runtime env) ---
     # --- Environment Keys ---
    tavily_api_key = os.environ.get("TAVILY_API_KEY")
    groq_api_key = os.environ.get("GROQ_API_KEY")
    # FIX 1: os.environ() galat tha, os.getenv() sahi hai
    google_api_key = os.getenv("GOOGLE_API_KEY")
    e2b_api_key = os.environ.get("E2B_API_KEY")
    cohere_api_key = os.environ.get("COHERE_API_KEY")
    mistralai_api_key = os.environ.get("MISTRALAI_API_KEY")

    # Graph Keys
    os.environ["NEO4J_URI"] = os.environ.get("NEO4J_URI")
    os.environ["NEO4J_USERNAME"] = os.environ.get("NEO4J_USERNAME")
    os.environ["NEO4J_PASSWORD"] = os.environ.get("NEO4J_PASSWORD")

    # --- Tools Setup ---
    web_search = TavilySearch(api_key=tavily_api_key, max_results=3)

    # 1. Setup Connection
    embeddings = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=os.environ["COHERE_API_KEY"])
    
    # Path './' kar diya hai taaki hosting par chale
    vector_store = Chroma(
        persist_directory="./chroma_db_groq_v1", 
        embedding_function=embeddings, 
        collection_name="rag_groq_final"
    )

    @tool
    def vector_tool(query: str) -> str:
        """
        Use this tool to find Context, from internal documents.
        Input: A search query (e.g., 'AutoDev project architecture').
        """
        print(f"   ⚡ [Vector Tool]: Searching Docs for '{query}'...")
        try:
            results = vector_store.similarity_search(query, k=4)
            if not results:
                return "VECTOR_FAILURE: No info found in docs."
            return "\n\n".join([doc.page_content for doc in results])
        except Exception as e:
            return f"Vector Error: {e}"


    # 1. Setup Graph & Smart Prompt
    try:
        graph = Neo4jGraph()

      # --- 🔥 UPDATED STRONG PROMPT ---
        cypher_template = """
        You are a Neo4j Expert.
    Schema: {schema}
    Goal: Generate a Cypher query to retrieve ALL connections (Skills, Projects, Education) for the user.
    
    ⚠️ CRITICAL RULES:
    1. **Case-Insensitive:** ALWAYS use `WHERE toLower(p.name) CONTAINS toLower('avneesh')`.
    2. **Relationship Syntax:** When matching multiple relationships, DO NOT repeat the colon.
       - ❌ WRONG: `[:HAS_SKILL|:WORKED_ON|:HAS_DEGREE]`
       - ✅ CORRECT: `[:HAS_SKILL|WORKED_ON|HAS_DEGREE]`
    3. **Pattern:** `MATCH (p:Person)-[r]->(related) WHERE toLower(p.name) CONTAINS toLower('{question_keyword}') RETURN p, r, related LIMIT 50`
    
    Question: {question}
    Cypher Query:
       """

        cypher_prompt = PromptTemplate(
            template=cypher_template,
            input_variables=["schema", "question"]
        )

        # Graph logic ke liye GPT-4o best hai (Syntax errors kam karta hai)
        cypher_chain = GraphCypherQAChain.from_llm(
            llm=ChatGoogleGenerativeAI(model="gemini-2.5-pro",api_key=google_api_key),
            graph=graph,
            verbose=True,
            cypher_prompt=cypher_prompt,
            allow_dangerous_requests=True
        )

        @tool
        def graph_tool(query: str) -> str:
            """
            Use this tool to find Skills, Relationships, Connections, or Tech Stacks.
            """
            print(f"   🔗 [Graph Tool]: Reasoning for '{query}'...")
            try:
                result = cypher_chain.invoke(query)
                data = result['result']

                # Agar Graph khali hai, to Supervisor ko signal do
                if not data or "I don't know" in data or data == "[]":
                    return "GRAPH_FAILURE: No direct match. SUGGESTION: Ask Vector_Specialist."

                return str(data)
            except Exception as e:
                return f"Graph Error: {e}"
    except:
        # Fallback if graph fails
        @tool
        def graph_tool(query: str) -> str: return "Graph Connection Failed"




    # --- Models ---
    @traceable
    def create_groq_supervisor_model():
        return ChatGroq(model="qwen/qwen3-32b", api_key=groq_api_key )# Powerful model for reasoning

    @traceable
    def create_groq_coding_model():
        return ChatGroq(model="llama-3.3-70b-versatile", api_key=groq_api_key) # Powerful model for coding

    @traceable
    def create_google_coding2_model():
        return ChatGoogleGenerativeAI(model="gemini-2.5-flash",api_key=google_api_key) # Powerful model for coding

    @traceable
    def create_groq_coding3_model():
        return ChatGroq(model="openai/gpt-oss-20b", api_key=groq_api_key) # Powerful model for coding

    @traceable
    def create_mistralai_coding4_model():
        return ChatMistralAI(api_key=mistralai_api_key,
        model="ministral-3b-latest", # Using the stable and powerful model
        max_retries=3,
        temperature=0.7) # max_token removed to avoid errors

    sup_model = create_groq_supervisor_model()
    coding_model = create_groq_coding_model()
    coding_model2 = create_google_coding2_model()
    coding_model3 = create_groq_coding3_model()
    coding_model4 = create_mistralai_coding4_model()


    graph_agent = create_react_agent(
        model=coding_model3,
        tools=[graph_tool],
        prompt=(
            """tool` to query the Knowledge Graph.

    **Instructions:**
    1. Analyze the user's question and extract key entities (Names, Technologies).
    2. Use the tool to query the database.
    3. **If Data Found:** Return the exact facts found (e.g., "Avneesh knows Python, Java").
    4. **If No Data/Empty:** Return exactly: "GRAPH_FAILURE: No relevant data found in the knowledge graph."
    5. pass answer to supervisor but do not call any tool  respond with the exact message: "graph_info passed successfully to supervisor."""
        ),
        name="graph_agent",
    )

    #_-- vector Agent ---
    vector_agent = create_react_agent(
        model=coding_model,
        tools=[vector_tool],
        prompt=(
        """
            "You are a vector expert. Use `vector_tool` to find document context."
            "When you get the information, just output the answer directly as text."
            "pass answer to supervisor but do not call any tool  respond with the exact message: "vector passed successfully to supervisor."""
        ),
        name="vector_agent",
    )

    web_agent = create_react_agent(
        model=sup_model,
        tools=[web_search],
        prompt=(
        """
            "You are a web expert. Use `web_search_tool` to find real-time context."
            "When you get the information, just output the answer directly as text."
            "pass answer to supervisor but do not call any tool  respond with the exact message: "web_info passed successfully to supervisor."""
        ),
        name="web_agent",
    )

  

    # --- Supervisor ---
    supervisor = create_supervisor(
        model=coding_model2,
        agents=[vector_agent,graph_agent, web_agent],
        prompt=(
    """
   You are a Senior Technical Project Manager (Supervisor).
    Your goal is to answer user queries by orchestrating a team of agents in a SPECIFIC ORDER.
    
    ⚠️ **INTELLIGENT WORKFLOW (FOLLOW STRICTLY):**
    
    **STEP 1: IDENTIFY (Graph_Agent)**
    - ALWAYS call `Graph_Agent` FIRST.
    - Ask it to find the *relationships* and *facts* about the entities in the user's query.
    - *Goal:* Figure out "What is this?" (e.g., Is 'AutoDev' a person, a project, or a skill?).
    
    **STEP 2: STRATEGIZE (Internal Thought)**
    - Analyze the Graph_Agent's output.
    - IF Graph says "X is a Project" -> You know you need architecture/code details.
    - IF Graph says "X is a Person" -> You might need their resume/bio.
    
    **STEP 3: DEEP DIVE (Vector_Agent)**
    - Based on Step 2, formulate a *Specific Query* for the `Vector_Agent`.
    - ❌ DO NOT just pass the user's raw question.
    - ✅ PASS A TARGETED QUERY. 
      - *Bad:* "Tell me about AutoDev."
      - *Good:* "Search documents for 'AutoDev' architecture diagrams, tech stack, and deployment steps."
    
    **STEP 4: EXECUTE/REFINE (Coder_Agent / Web_Agent)**
    - If you need to calculate something or run code, use `Coder_Agent` with the data gathered from Step 1 & 3.
    - Use `Web_Agent` ONLY if the data is missing from both Graph and Docs (e.g., live stock prices).
    
    **FINAL OUTPUT:**
    - Synthesize all info into a clear, professional answer.
    """
        ),
        sanitize_names=True,
        add_handoff_back_messages=True,
        output_mode="full_history",
    ).compile()

    # --- Return Statement (Ab ye indented hai, isliye error nahi aayega) ---
    return supervisor