Spaces:

avneesh123
/

Hybrid-GraphRAG-Agent

Sleeping

App Files Files Community

Hybrid-GraphRAG-Agent / src /agent.py

avneesh123

Update src/agent.py

e85789d verified 6 months ago

raw

history blame contribute delete

9.76 kB


	import os
	import time
	import traceback
	from dotenv import load_dotenv
	from langchain_core.messages import convert_to_messages
	from langchain_core.tools import tool
	from langchain_groq import ChatGroq
	from langchain_tavily import TavilySearch
	from langgraph.prebuilt import create_react_agent
	from langgraph_supervisor import create_supervisor
	from langchain_google_genai import ChatGoogleGenerativeAI
	from langsmith import traceable
	from e2b_code_interpreter import Sandbox
	from langchain_chroma import Chroma
	from langchain_community.graphs import Neo4jGraph
	from langchain_cohere import ChatCohere, CohereEmbeddings
	from langchain_community.chains.graph_qa.cypher import GraphCypherQAChain
	from langchain_core.prompts import PromptTemplate
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_experimental.graph_transformers import LLMGraphTransformer
	from langchain_mistralai import ChatMistralAI

	# --- WRAPPER FUNCTION START ---
	def get_agent_app():
	# --- Load Environment ---
	load_dotenv()

	# --- Environment Keys (Inside function to capture runtime env) ---
	# --- Environment Keys ---
	tavily_api_key = os.environ.get("TAVILY_API_KEY")
	groq_api_key = os.environ.get("GROQ_API_KEY")
	# FIX 1: os.environ() galat tha, os.getenv() sahi hai
	google_api_key = os.getenv("GOOGLE_API_KEY")
	e2b_api_key = os.environ.get("E2B_API_KEY")
	cohere_api_key = os.environ.get("COHERE_API_KEY")
	mistralai_api_key = os.environ.get("MISTRALAI_API_KEY")

	# Graph Keys
	os.environ["NEO4J_URI"] = os.environ.get("NEO4J_URI")
	os.environ["NEO4J_USERNAME"] = os.environ.get("NEO4J_USERNAME")
	os.environ["NEO4J_PASSWORD"] = os.environ.get("NEO4J_PASSWORD")

	# --- Tools Setup ---
	web_search = TavilySearch(api_key=tavily_api_key, max_results=3)

	# 1. Setup Connection
	embeddings = CohereEmbeddings(model="embed-english-v3.0", cohere_api_key=os.environ["COHERE_API_KEY"])

	# Path './' kar diya hai taaki hosting par chale
	vector_store = Chroma(
	persist_directory="./chroma_db_groq_v1",
	embedding_function=embeddings,
	collection_name="rag_groq_final"
	)

	@tool
	def vector_tool(query: str) -> str:
	"""
	Use this tool to find Context, from internal documents.
	Input: A search query (e.g., 'AutoDev project architecture').
	"""
	print(f" ⚡ [Vector Tool]: Searching Docs for '{query}'...")
	try:
	results = vector_store.similarity_search(query, k=4)
	if not results:
	return "VECTOR_FAILURE: No info found in docs."
	return "\n\n".join([doc.page_content for doc in results])
	except Exception as e:
	return f"Vector Error: {e}"


	# 1. Setup Graph & Smart Prompt
	try:
	graph = Neo4jGraph()

	# --- 🔥 UPDATED STRONG PROMPT ---
	cypher_template = """
	You are a Neo4j Expert.
	Schema: {schema}
	Goal: Generate a Cypher query to retrieve ALL connections (Skills, Projects, Education) for the user.

	⚠️ CRITICAL RULES:
	1. Case-Insensitive: ALWAYS use `WHERE toLower(p.name) CONTAINS toLower('avneesh')`.
	2. Relationship Syntax: When matching multiple relationships, DO NOT repeat the colon.
	- ❌ WRONG: `[:HAS_SKILL\|:WORKED_ON\|:HAS_DEGREE]`
	- ✅ CORRECT: `[:HAS_SKILL\|WORKED_ON\|HAS_DEGREE]`
	3. Pattern: `MATCH (p:Person)-[r]->(related) WHERE toLower(p.name) CONTAINS toLower('{question_keyword}') RETURN p, r, related LIMIT 50`

	Question: {question}
	Cypher Query:
	"""

	cypher_prompt = PromptTemplate(
	template=cypher_template,
	input_variables=["schema", "question"]
	)

	# Graph logic ke liye GPT-4o best hai (Syntax errors kam karta hai)
	cypher_chain = GraphCypherQAChain.from_llm(
	llm=ChatGoogleGenerativeAI(model="gemini-2.5-pro",api_key=google_api_key),
	graph=graph,
	verbose=True,
	cypher_prompt=cypher_prompt,
	allow_dangerous_requests=True
	)

	@tool
	def graph_tool(query: str) -> str:
	"""
	Use this tool to find Skills, Relationships, Connections, or Tech Stacks.
	"""
	print(f" 🔗 [Graph Tool]: Reasoning for '{query}'...")
	try:
	result = cypher_chain.invoke(query)
	data = result['result']

	# Agar Graph khali hai, to Supervisor ko signal do
	if not data or "I don't know" in data or data == "[]":
	return "GRAPH_FAILURE: No direct match. SUGGESTION: Ask Vector_Specialist."

	return str(data)
	except Exception as e:
	return f"Graph Error: {e}"
	except:
	# Fallback if graph fails
	@tool
	def graph_tool(query: str) -> str: return "Graph Connection Failed"




	# --- Models ---
	@traceable
	def create_groq_supervisor_model():
	return ChatGroq(model="qwen/qwen3-32b", api_key=groq_api_key )# Powerful model for reasoning

	@traceable
	def create_groq_coding_model():
	return ChatGroq(model="llama-3.3-70b-versatile", api_key=groq_api_key) # Powerful model for coding

	@traceable
	def create_google_coding2_model():
	return ChatGoogleGenerativeAI(model="gemini-2.5-flash",api_key=google_api_key) # Powerful model for coding

	@traceable
	def create_groq_coding3_model():
	return ChatGroq(model="openai/gpt-oss-20b", api_key=groq_api_key) # Powerful model for coding

	@traceable
	def create_mistralai_coding4_model():
	return ChatMistralAI(api_key=mistralai_api_key,
	model="ministral-3b-latest", # Using the stable and powerful model
	max_retries=3,
	temperature=0.7) # max_token removed to avoid errors

	sup_model = create_groq_supervisor_model()
	coding_model = create_groq_coding_model()
	coding_model2 = create_google_coding2_model()
	coding_model3 = create_groq_coding3_model()
	coding_model4 = create_mistralai_coding4_model()


	graph_agent = create_react_agent(
	model=coding_model3,
	tools=[graph_tool],
	prompt=(
	"""tool` to query the Knowledge Graph.

	Instructions:
	1. Analyze the user's question and extract key entities (Names, Technologies).
	2. Use the tool to query the database.
	3. If Data Found: Return the exact facts found (e.g., "Avneesh knows Python, Java").
	4. If No Data/Empty: Return exactly: "GRAPH_FAILURE: No relevant data found in the knowledge graph."
	5. pass answer to supervisor but do not call any tool respond with the exact message: "graph_info passed successfully to supervisor."""
	),
	name="graph_agent",
	)

	#_-- vector Agent ---
	vector_agent = create_react_agent(
	model=coding_model,
	tools=[vector_tool],
	prompt=(
	"""
	"You are a vector expert. Use `vector_tool` to find document context."
	"When you get the information, just output the answer directly as text."
	"pass answer to supervisor but do not call any tool respond with the exact message: "vector passed successfully to supervisor."""
	),
	name="vector_agent",
	)

	web_agent = create_react_agent(
	model=sup_model,
	tools=[web_search],
	prompt=(
	"""
	"You are a web expert. Use `web_search_tool` to find real-time context."
	"When you get the information, just output the answer directly as text."
	"pass answer to supervisor but do not call any tool respond with the exact message: "web_info passed successfully to supervisor."""
	),
	name="web_agent",
	)



	# --- Supervisor ---
	supervisor = create_supervisor(
	model=coding_model2,
	agents=[vector_agent,graph_agent, web_agent],
	prompt=(
	"""
	You are a Senior Technical Project Manager (Supervisor).
	Your goal is to answer user queries by orchestrating a team of agents in a SPECIFIC ORDER.

	⚠️ INTELLIGENT WORKFLOW (FOLLOW STRICTLY):

	STEP 1: IDENTIFY (Graph_Agent)
	- ALWAYS call `Graph_Agent` FIRST.
	- Ask it to find the relationships and facts about the entities in the user's query.
	- Goal: Figure out "What is this?" (e.g., Is 'AutoDev' a person, a project, or a skill?).

	STEP 2: STRATEGIZE (Internal Thought)
	- Analyze the Graph_Agent's output.
	- IF Graph says "X is a Project" -> You know you need architecture/code details.
	- IF Graph says "X is a Person" -> You might need their resume/bio.

	STEP 3: DEEP DIVE (Vector_Agent)
	- Based on Step 2, formulate a Specific Query for the `Vector_Agent`.
	- ❌ DO NOT just pass the user's raw question.
	- ✅ PASS A TARGETED QUERY.
	- Bad: "Tell me about AutoDev."
	- Good: "Search documents for 'AutoDev' architecture diagrams, tech stack, and deployment steps."

	STEP 4: EXECUTE/REFINE (Coder_Agent / Web_Agent)
	- If you need to calculate something or run code, use `Coder_Agent` with the data gathered from Step 1 & 3.
	- Use `Web_Agent` ONLY if the data is missing from both Graph and Docs (e.g., live stock prices).

	FINAL OUTPUT:
	- Synthesize all info into a clear, professional answer.
	"""
	),
	sanitize_names=True,
	add_handoff_back_messages=True,
	output_mode="full_history",
	).compile()

	# --- Return Statement (Ab ye indented hai, isliye error nahi aayega) ---
	return supervisor