AmritSbisht commited on
Commit
1e9b22a
·
verified ·
1 Parent(s): a6a3acc

Upload 35 files

Browse files
Files changed (35) hide show
  1. requirements.txt +13 -0
  2. upload/agents/__init__.py +0 -0
  3. upload/agents/__pycache__/__init__.cpython-311.pyc +0 -0
  4. upload/agents/__pycache__/rag_agent.cpython-311.pyc +0 -0
  5. upload/agents/__pycache__/router_agent.cpython-311.pyc +0 -0
  6. upload/agents/__pycache__/weather_agent.cpython-311.pyc +0 -0
  7. upload/agents/rag_agent.py +75 -0
  8. upload/agents/router_agent.py +50 -0
  9. upload/agents/weather_agent.py +82 -0
  10. upload/app.py +116 -0
  11. upload/graph/__init__.py +0 -0
  12. upload/graph/__pycache__/__init__.cpython-311.pyc +0 -0
  13. upload/graph/__pycache__/workflow.cpython-311.pyc +0 -0
  14. upload/graph/workflow.py +102 -0
  15. upload/models/__init__.py +0 -0
  16. upload/models/__pycache__/__init__.cpython-311.pyc +0 -0
  17. upload/models/__pycache__/vector_store.cpython-311.pyc +0 -0
  18. upload/models/embedding.py +27 -0
  19. upload/models/vector_store.py +72 -0
  20. upload/tests/__init__.py +0 -0
  21. upload/tests/__pycache__/__init__.cpython-311.pyc +0 -0
  22. upload/tests/__pycache__/test_api_handler.cpython-311-pytest-8.3.5.pyc +0 -0
  23. upload/tests/__pycache__/test_rag_agent.cpython-311-pytest-8.3.5.pyc +0 -0
  24. upload/tests/__pycache__/test_workflow.cpython-311-pytest-8.3.5.pyc +0 -0
  25. upload/tests/test_api_handler.py +79 -0
  26. upload/tests/test_rag_agent.py +71 -0
  27. upload/tests/test_workflow.py +78 -0
  28. upload/utils/__init__.py +0 -0
  29. upload/utils/__pycache__/__init__.cpython-311.pyc +0 -0
  30. upload/utils/__pycache__/api_handler.cpython-311.pyc +0 -0
  31. upload/utils/__pycache__/document_loader.cpython-311.pyc +0 -0
  32. upload/utils/__pycache__/evaluation.cpython-311.pyc +0 -0
  33. upload/utils/api_handler.py +68 -0
  34. upload/utils/document_loader.py +62 -0
  35. upload/utils/evaluation.py +66 -0
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ google-genai
2
+ langchain-google-genai
3
+ langchain
4
+ langgraph
5
+ langsmith
6
+ qdrant-client
7
+ pypdf
8
+ langchain-community
9
+ requests
10
+ streamlit
11
+ tqdm
12
+ python-dotenv
13
+ pytest
upload/agents/__init__.py ADDED
File without changes
upload/agents/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (161 Bytes). View file
 
upload/agents/__pycache__/rag_agent.cpython-311.pyc ADDED
Binary file (4.63 kB). View file
 
upload/agents/__pycache__/router_agent.cpython-311.pyc ADDED
Binary file (3.25 kB). View file
 
upload/agents/__pycache__/weather_agent.cpython-311.pyc ADDED
Binary file (4.4 kB). View file
 
upload/agents/rag_agent.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Any, List
2
+ from langchain_google_genai import ChatGoogleGenerativeAI
3
+ from langchain.prompts import ChatPromptTemplate
4
+ from langchain.schema import Document
5
+ from pydantic import BaseModel, Field
6
+ from models.vector_store import VectorStore
7
+ import os
8
+ from dotenv import load_dotenv
9
+ load_dotenv()
10
+
11
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
12
+
13
+ class RAGAgentState(BaseModel):
14
+ """State for the RAG agent"""
15
+ query: str = Field(description="The user's query for document retrieval")
16
+ context: List[Dict[str, Any]] = Field(description="Retrieved context from documents", default=[])
17
+ response: str = Field(description="The agent's response", default="")
18
+
19
+ class RAGAgent:
20
+ """Agent that handles document-based queries using RAG"""
21
+
22
+ def __init__(self, api_key: str = GEMINI_API_KEY):
23
+ self.llm = ChatGoogleGenerativeAI(
24
+ model="gemini-2.0-flash",
25
+ google_api_key=api_key
26
+ )
27
+
28
+ self.vector_store = VectorStore()
29
+
30
+ self.rag_prompt = ChatPromptTemplate.from_messages([
31
+ ("system", """You are an expert research assistant helping a user understand complex topics clearly and concisely.
32
+ Use only the provided context to answer the user's question. If the context does not contain the answer, say:
33
+ "I don't have enough information to answer that question."
34
+
35
+ When answering:
36
+ - Explain technical terms simply
37
+ - Use examples if helpful
38
+ - Keep the tone friendly and helpful
39
+
40
+ Context:
41
+ {context}"""),
42
+ ("human", "{query}")
43
+ ])
44
+
45
+ self.rag_chain = self.rag_prompt | self.llm
46
+
47
+ def retrieve_context(self, query: str, k: int = 4) -> List[Document]:
48
+ """Retrieve relevant context from the vector store"""
49
+ return self.vector_store.similarity_search(query, k=k)
50
+
51
+ def get_rag_response(self, query: str) -> Dict[str, Any]:
52
+ """Generate a RAG-based response to the query"""
53
+ # Retrieve relevant documents
54
+ docs = self.retrieve_context(query)
55
+
56
+ if not docs:
57
+ return {
58
+ "context": [],
59
+ "response": "I couldn't find any relevant information in the documents to answer your question."
60
+ }
61
+
62
+ # Format context
63
+ context_texts = [doc.page_content for doc in docs]
64
+ context_str = "\n\n".join(context_texts)
65
+
66
+ # Generate response
67
+ response = self.rag_chain.invoke({
68
+ "query": query,
69
+ "context": context_str
70
+ })
71
+
72
+ return {
73
+ "context": [{"page_content": doc.page_content, "metadata": doc.metadata} for doc in docs],
74
+ "response": response.content
75
+ }
upload/agents/router_agent.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Any, List, Tuple
2
+ from langchain_google_genai import ChatGoogleGenerativeAI
3
+ from langchain.prompts import ChatPromptTemplate
4
+ from pydantic import BaseModel , Field
5
+ from langgraph.graph import StateGraph
6
+ import os
7
+ from dotenv import load_dotenv
8
+ load_dotenv()
9
+
10
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
11
+
12
+ class RouterState(BaseModel):
13
+ """State for the router agent"""
14
+ query: str = Field(description="The user's query")
15
+ action: str = Field(description="The action to take: 'weather' or 'document'")
16
+
17
+ class RouterAgent:
18
+ """Agent that decides whether to use weather API or document RAG"""
19
+
20
+ def __init__(self, api_key: str = GEMINI_API_KEY):
21
+ self.llm = ChatGoogleGenerativeAI(
22
+ model="gemini-2.0-flash",
23
+ google_api_key=api_key
24
+ )
25
+
26
+ self.prompt = ChatPromptTemplate.from_messages([
27
+ ("system", """You are a router agent that decides whether a user query is about:
28
+ 1. Weather information (requiring a weather API call)
29
+ 2. Information from documents (requiring RAG retrieval)
30
+
31
+ If the query mentions weather, forecast, temperature, rain, sun, climate, or other weather-related terms for a specific location, classify it as 'weather'.
32
+
33
+ Otherwise, classify it as 'document' for document retrieval.
34
+
35
+ Return only 'weather' or 'document' as your classification."""),
36
+ ("human", "{query}")
37
+ ])
38
+
39
+ self.chain = self.prompt | self.llm
40
+
41
+ def route_query(self, query: str) -> str:
42
+ """Route a query to either weather API or document RAG"""
43
+ response = self.chain.invoke({"query": query})
44
+ # Extract just the decision: 'weather' or 'document'
45
+ decision = response.content.strip().lower()
46
+
47
+ if "weather" in decision:
48
+ return "weather"
49
+ else:
50
+ return "document"
upload/agents/weather_agent.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Any
2
+ from langchain_google_genai import ChatGoogleGenerativeAI
3
+ from langchain.prompts import ChatPromptTemplate
4
+ from langchain.pydantic_v1 import BaseModel, Field
5
+ from utils.api_handler import WeatherAPIHandler
6
+ import os
7
+ from dotenv import load_dotenv
8
+ load_dotenv()
9
+
10
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
11
+
12
+
13
+
14
+ class WeatherAgentState(BaseModel):
15
+ """State for the weather agent"""
16
+ query: str = Field(description="The user's query about weather")
17
+ city: str = Field(description="The city to get weather for", default="")
18
+ weather_data: Dict[str, Any] = Field(description="Raw weather data", default={})
19
+ response: str = Field(description="The agent's response", default="")
20
+
21
+ class WeatherAgent:
22
+ """Agent that handles weather-related queries"""
23
+
24
+ def __init__(self, api_key: str = GEMINI_API_KEY):
25
+ self.llm = ChatGoogleGenerativeAI(
26
+ model="gemini-2.0-flash",
27
+ google_api_key=api_key
28
+ )
29
+
30
+ self.weather_api = WeatherAPIHandler()
31
+
32
+ self.extract_city_prompt = ChatPromptTemplate.from_messages([
33
+ ("system", """Extract the city name from the user's weather query.
34
+ Return ONLY the city name, nothing else.
35
+ If no city is mentioned, return "Not specified"."""),
36
+ ("human", "{query}")
37
+ ])
38
+
39
+ self.response_prompt = ChatPromptTemplate.from_messages([
40
+ ("system", """You are a helpful weather assistant.
41
+ Format the weather information in a friendly, conversational way.
42
+ Include all relevant weather details from the provided data."""),
43
+ ("human", "Query: {query}\nWeather Data: {weather_info}")
44
+ ])
45
+
46
+ self.extract_city_chain = self.extract_city_prompt | self.llm
47
+ self.response_chain = self.response_prompt | self.llm
48
+
49
+ def extract_city(self, query: str) -> str:
50
+ """Extract city name from the user query"""
51
+ response = self.extract_city_chain.invoke({"query": query})
52
+ city = response.content.strip()
53
+
54
+ # Handle case where no city is specified
55
+ if city.lower() == "not specified":
56
+ return "London" # Default city
57
+
58
+ return city
59
+
60
+ def get_weather_response(self, query: str, city: str = None) -> Dict[str, Any]:
61
+ """Get weather data and generate a response"""
62
+ # Extract city if not provided
63
+ if not city:
64
+ city = self.extract_city(query)
65
+
66
+ # Get weather data
67
+ weather_data = self.weather_api.get_weather(city)
68
+
69
+ # Format weather data
70
+ weather_info = self.weather_api.format_weather_data(weather_data)
71
+
72
+ # Generate response
73
+ response = self.response_chain.invoke({
74
+ "query": query,
75
+ "weather_info": weather_info
76
+ })
77
+
78
+ return {
79
+ "city": city,
80
+ "weather_data": weather_data,
81
+ "response": response.content
82
+ }
upload/app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from typing import Dict, Any, List
3
+ import tempfile
4
+ import os
5
+
6
+ from graph.workflow import LangGraphWorkflow
7
+ from utils.document_loader import DocumentLoader
8
+ from models.vector_store import VectorStore
9
+
10
+ from dotenv import load_dotenv
11
+
12
+ load_dotenv()
13
+
14
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
15
+ OPENWEATHERMAP_API_KEY = os.getenv("OPENWEATHERMAP_API_KEY")
16
+ LANGSMITH_TRACING= True
17
+ LANGSMITH_ENDPOINT= os.getenv("LANGSMITH_ENDPOINT")
18
+ LANGSMITH_API_KEY= os.getenv("LANGSMITH_API_KEY")
19
+ LANGSMITH_PROJECT= os.getenv("LANGSMITH_PROJECT")
20
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
21
+ db_url = os.getenv("db_url")
22
+ db_api = os.getenv("db_api")
23
+
24
+
25
+ def main():
26
+ st.title("AI Pipeline with LangChain & LangGraph")
27
+
28
+ # Initialize components
29
+ doc_loader = DocumentLoader()
30
+ vector_store = VectorStore()
31
+ workflow = LangGraphWorkflow()
32
+
33
+ # Sidebar - Document Upload
34
+ st.sidebar.header("Upload Documents")
35
+ uploaded_file = st.sidebar.file_uploader("Upload a PDF document", type="pdf")
36
+
37
+ if uploaded_file:
38
+ with st.spinner("Processing document..."):
39
+ # Save the uploaded file
40
+ pdf_path = doc_loader.save_uploaded_pdf(uploaded_file)
41
+
42
+ if pdf_path:
43
+ # Load and process the document
44
+ documents = doc_loader.load_pdf(pdf_path)
45
+
46
+ if documents:
47
+ # Add documents to vector store
48
+ success = vector_store.add_documents(documents)
49
+
50
+ if success:
51
+ st.sidebar.success(f"Document '{uploaded_file.name}' processed and indexed successfully!")
52
+ else:
53
+ st.sidebar.error("Failed to index the document.")
54
+ else:
55
+ st.sidebar.error("Failed to process the document.")
56
+
57
+ # Available documents
58
+ st.sidebar.header("Available Documents")
59
+ documents = doc_loader.get_available_documents()
60
+ if documents:
61
+ st.sidebar.write(", ".join(documents))
62
+ else:
63
+ st.sidebar.write("No documents available")
64
+
65
+ # Chat interface
66
+ st.header("Chat Interface")
67
+
68
+ # Initialize chat history
69
+ if "messages" not in st.session_state:
70
+ st.session_state.messages = []
71
+
72
+ # Display chat history
73
+ for message in st.session_state.messages:
74
+ with st.chat_message(message["role"]):
75
+ st.write(message["content"])
76
+
77
+ # User input
78
+ user_query = st.chat_input("Ask about weather or document information")
79
+
80
+ if user_query:
81
+ # Add user message to chat history
82
+ st.session_state.messages.append({"role": "user", "content": user_query})
83
+
84
+ # Display user message
85
+ with st.chat_message("user"):
86
+ st.write(user_query)
87
+
88
+ # Process query
89
+ with st.spinner("Thinking..."):
90
+ result = workflow.invoke(user_query)
91
+
92
+ # Add assistant message to chat history
93
+ st.session_state.messages.append({"role": "assistant", "content": result["response"]})
94
+
95
+ # Display assistant message
96
+ with st.chat_message("assistant"):
97
+ st.write(result["response"])
98
+
99
+ # Additional debug info in expander
100
+ with st.expander("Debug Information"):
101
+ st.write(f"Action: {result['action']}")
102
+
103
+ if result['action'] == 'weather' and result['city']:
104
+ st.write(f"City: {result['city']}")
105
+
106
+ if result['action'] == 'document' and result['context']:
107
+ st.write("Retrieved Context:")
108
+ for i, ctx in enumerate(result['context']):
109
+ st.write(f"Document {i+1}:")
110
+ st.write(ctx['page_content'])
111
+
112
+ st.write("Evaluation Metrics:")
113
+ st.write(result['evaluation'])
114
+
115
+ if __name__ == "__main__":
116
+ main()
upload/graph/__init__.py ADDED
File without changes
upload/graph/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (160 Bytes). View file
 
upload/graph/__pycache__/workflow.cpython-311.pyc ADDED
Binary file (6.64 kB). View file
 
upload/graph/workflow.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Any, List, Literal, TypedDict, Annotated, Union
2
+ from langchain.schema import Document
3
+ from pydantic import BaseModel, Field
4
+ from langgraph.graph import StateGraph, END
5
+ from agents.router_agent import RouterAgent
6
+ from agents.weather_agent import WeatherAgent
7
+ from agents.rag_agent import RAGAgent
8
+ from utils.evaluation import LangSmithEvaluator
9
+
10
+
11
+ class WorkflowState(BaseModel):
12
+ """State for the workflow graph"""
13
+ query: str = Field(description="The user's original query")
14
+ action: str = Field(description="The action to take: 'weather' or 'document'", default="")
15
+ context: List[Dict[str, Any]] = Field(description="Retrieved context (for document queries)", default=[])
16
+ weather_data: Dict[str, Any] = Field(description="Weather data (for weather queries)", default={})
17
+ city: str = Field(description="City for weather queries", default="")
18
+ response: str = Field(description="The final response to the user", default="")
19
+ evaluation: Dict[str, Any] = Field(description="Evaluation results", default={})
20
+
21
+ class LangGraphWorkflow:
22
+ """LangGraph workflow for the AI pipeline"""
23
+
24
+ def __init__(self):
25
+ self.router_agent = RouterAgent()
26
+ self.weather_agent = WeatherAgent()
27
+ self.rag_agent = RAGAgent()
28
+ self.evaluator = LangSmithEvaluator()
29
+
30
+ # Build the workflow graph
31
+ self.workflow = self.build_workflow()
32
+
33
+ def route(self, state: WorkflowState) -> WorkflowState:
34
+ """Route the query to the appropriate agent"""
35
+ action = self.router_agent.route_query(state.query)
36
+ return state.copy(update={"action": action})
37
+
38
+ def process_weather(self, state: WorkflowState) -> WorkflowState:
39
+ """Process weather-related queries"""
40
+ weather_response = self.weather_agent.get_weather_response(state.query)
41
+ return state.copy(update={
42
+ "city": weather_response["city"],
43
+ "weather_data": weather_response["weather_data"],
44
+ "response": weather_response["response"]
45
+ })
46
+
47
+ def process_document(self, state: WorkflowState) -> WorkflowState:
48
+ """Process document-related queries"""
49
+ rag_response = self.rag_agent.get_rag_response(state.query)
50
+ return state.copy(update={
51
+ "context": rag_response["context"],
52
+ "response": rag_response["response"]
53
+ })
54
+
55
+ def evaluate_response(self, state: WorkflowState) -> WorkflowState:
56
+ """Evaluate the response using LangSmith"""
57
+ # For simplicity, we're only evaluating basic metrics here
58
+ evaluation = {
59
+ "query": state.query,
60
+ "response": state.response,
61
+ "action": state.action,
62
+ # Additional metrics would come from LangSmith in a real implementation
63
+ "confidence": 0.95 if state.context or state.weather_data else 0.7,
64
+ "latency": 1.2, # Example metric
65
+ }
66
+
67
+ return state.copy(update={"evaluation": evaluation})
68
+
69
+ def build_workflow(self) -> StateGraph:
70
+ """Build the LangGraph workflow"""
71
+ workflow = StateGraph(WorkflowState)
72
+
73
+ # Register nodes with names + actual methods
74
+ workflow.add_node("router", self.route) # Use callable (method) for logic
75
+ workflow.add_node("weather", self.process_weather) # Use callable
76
+ workflow.add_node("document", self.process_document) # Use callable
77
+ workflow.add_node("evaluate", self.evaluate_response) # Use callable
78
+
79
+ # Conditional edges — based on state.action
80
+ workflow.add_conditional_edges(
81
+ "router", # Source node
82
+ lambda state: state.action, # Condition function
83
+ {
84
+ "weather": "weather", # Condition -> Target node
85
+ "document": "document"
86
+ }
87
+ )
88
+ # Sequential steps
89
+ workflow.add_edge("weather", "evaluate") # Use node names
90
+ workflow.add_edge("document", "evaluate") # Use node names
91
+ workflow.add_edge("evaluate", END) # Use node name
92
+
93
+ # Set entry point
94
+ workflow.set_entry_point("router") # Use node name
95
+
96
+ return workflow.compile()
97
+
98
+ def invoke(self, query: str) -> Dict[str, Any]:
99
+ """Invoke the workflow with a query"""
100
+ state = WorkflowState(query=query)
101
+ result = self.workflow.invoke(state)
102
+ return result
upload/models/__init__.py ADDED
File without changes
upload/models/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (161 Bytes). View file
 
upload/models/__pycache__/vector_store.cpython-311.pyc ADDED
Binary file (4.03 kB). View file
 
upload/models/embedding.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict, Any
2
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
3
+ from langchain.schema import Document
4
+ import os
5
+ from dotenv import load_dotenv
6
+ load_dotenv()
7
+
8
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
9
+
10
+
11
+ class EmbeddingModel:
12
+ """Handles document embedding using Google's Gemini embedding models"""
13
+
14
+ def __init__(self, api_key: str = GEMINI_API_KEY):
15
+ self.embeddings = GoogleGenerativeAIEmbeddings(
16
+ google_api_key=api_key,
17
+ model="models/text-embedding-004"
18
+ )
19
+
20
+ def embed_documents(self, documents: List[Document]) -> List[List[float]]:
21
+ """Generate embeddings for a list of documents"""
22
+ texts = [doc.page_content for doc in documents]
23
+ return self.embeddings.embed_documents(texts)
24
+
25
+ def embed_query(self, query: str) -> List[float]:
26
+ """Generate embedding for a query string"""
27
+ return self.embeddings.embed_query(query)
upload/models/vector_store.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Dict, Any, Optional
2
+ from langchain.schema import Document
3
+ from langchain_community.vectorstores import Qdrant
4
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
5
+ from qdrant_client import QdrantClient
6
+ from qdrant_client.http import models as rest
7
+ from dotenv import load_dotenv
8
+ import os
9
+
10
+ QDRANT_COLLECTION_NAME = os.getenv("QDRANT_COLLECTION_NAME")
11
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
12
+ db_url = os.getenv("db_url")
13
+ db_api = os.getenv("db_api")
14
+
15
+
16
+ class VectorStore:
17
+ """Interface to the Qdrant vector database"""
18
+
19
+ def __init__(
20
+ self,
21
+ collection_name: str = QDRANT_COLLECTION_NAME,
22
+ db_url: str = db_url,
23
+ db_api: int = db_api,
24
+ api_key: str = GEMINI_API_KEY
25
+ ):
26
+ self.collection_name = collection_name
27
+ self.embeddings = GoogleGenerativeAIEmbeddings(
28
+ google_api_key=api_key,
29
+ model="models/text-embedding-004"
30
+ )
31
+
32
+ # Initialize Qdrant client
33
+ self.client = QdrantClient( url=f"https://{db_url}",
34
+ api_key=db_api)
35
+
36
+ # Create collection if it doesn't exist
37
+ collections = self.client.get_collections().collections
38
+ collection_names = [collection.name for collection in collections]
39
+
40
+ if collection_name not in collection_names:
41
+ self.client.create_collection(
42
+ collection_name=collection_name,
43
+ vectors_config=rest.VectorParams(
44
+ size=768, # Gemini embedding dimension
45
+ distance=rest.Distance.COSINE
46
+ )
47
+ )
48
+
49
+ # Initialize Qdrant vectorstore
50
+ self.vectorstore = Qdrant(
51
+ client=self.client,
52
+ collection_name=collection_name,
53
+ embeddings=self.embeddings
54
+ )
55
+
56
+ def add_documents(self, documents: List[Document]) -> bool:
57
+ """Add documents to the vector store"""
58
+ try:
59
+ self.vectorstore.add_documents(documents)
60
+ return True
61
+ except Exception as e:
62
+ print(f"Error adding documents to vector store: {str(e)}")
63
+ return False
64
+
65
+ def similarity_search(self, query: str, k: int = 4) -> List[Document]:
66
+ """Perform similarity search for a query"""
67
+ try:
68
+ return self.vectorstore.similarity_search(query, k=k)
69
+ except Exception as e:
70
+ print(f"Error during similarity search: {str(e)}")
71
+ return []
72
+
upload/tests/__init__.py ADDED
File without changes
upload/tests/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (160 Bytes). View file
 
upload/tests/__pycache__/test_api_handler.cpython-311-pytest-8.3.5.pyc ADDED
Binary file (4.51 kB). View file
 
upload/tests/__pycache__/test_rag_agent.cpython-311-pytest-8.3.5.pyc ADDED
Binary file (4.4 kB). View file
 
upload/tests/__pycache__/test_workflow.cpython-311-pytest-8.3.5.pyc ADDED
Binary file (4.65 kB). View file
 
upload/tests/test_api_handler.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from unittest.mock import patch, MagicMock
3
+ import json
4
+ from utils.api_handler import WeatherAPIHandler
5
+ from requests.exceptions import RequestException
6
+ from requests.exceptions import HTTPError
7
+
8
+ class TestWeatherAPIHandler(unittest.TestCase):
9
+
10
+ def setUp(self):
11
+ self.api_handler = WeatherAPIHandler(api_key="test_api_key")
12
+
13
+ # Sample successful response data
14
+ self.sample_response = {
15
+ "name": "London",
16
+ "sys": {"country": "GB"},
17
+ "main": {
18
+ "temp": 15.5,
19
+ "feels_like": 14.8,
20
+ "humidity": 76
21
+ },
22
+ "weather": [{"description": "scattered clouds"}],
23
+ "wind": {"speed": 3.6}
24
+ }
25
+
26
+ @patch('requests.get')
27
+ def test_get_weather_success(self, mock_get):
28
+ # Configure mock
29
+ mock_response = MagicMock()
30
+ mock_response.status_code = 200
31
+ mock_response.json.return_value = self.sample_response
32
+ mock_get.return_value = mock_response
33
+
34
+ # Call the method
35
+ result = self.api_handler.get_weather("London")
36
+
37
+ # Assertions
38
+ self.assertEqual(result, self.sample_response)
39
+ mock_get.assert_called_once()
40
+
41
+ @patch('requests.get')
42
+ def test_get_weather_city_not_found(self, mock_get):
43
+ mock_response = MagicMock()
44
+ mock_response.status_code = 404
45
+ mock_response.raise_for_status.side_effect = HTTPError(response=mock_response)
46
+ mock_get.return_value = mock_response
47
+
48
+ result = self.api_handler.get_weather("NonExistentCity")
49
+
50
+ self.assertIn("error", result)
51
+ self.assertIn("NonExistentCity", result["error"])
52
+
53
+
54
+ @patch('requests.get')
55
+ def test_get_weather_connection_error(self, mock_get):
56
+ mock_get.side_effect = RequestException("Connection Error")
57
+
58
+ result = self.api_handler.get_weather("London")
59
+
60
+ self.assertIn("error", result)
61
+ self.assertIn("Connection Error", result["error"])
62
+
63
+ def test_format_weather_data_success(self):
64
+ # Call the method
65
+ formatted_result = self.api_handler.format_weather_data(self.sample_response)
66
+
67
+ # Assertions
68
+ self.assertIn("London", formatted_result)
69
+ self.assertIn("15.5°C", formatted_result)
70
+ self.assertIn("scattered clouds", formatted_result.lower())
71
+
72
+ def test_format_weather_data_error(self):
73
+ # Call the method with incomplete data
74
+ formatted_result = self.api_handler.format_weather_data({"error": "City not found"})
75
+
76
+ # Assertions
77
+ self.assertEqual(formatted_result, "City not found")
78
+
79
+
upload/tests/test_rag_agent.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from unittest.mock import patch, MagicMock
3
+ from agents.rag_agent import RAGAgent
4
+ from langchain.schema import Document
5
+
6
+ class TestRAGAgent(unittest.TestCase):
7
+
8
+ def setUp(self):
9
+ # Create a mock for vector store
10
+ self.vector_store_patch = patch('agents.rag_agent.VectorStore')
11
+ self.mock_vector_store_class = self.vector_store_patch.start()
12
+ self.mock_vector_store = self.mock_vector_store_class.return_value
13
+
14
+ # Create a mock for LLM
15
+ self.llm_patch = patch('agents.rag_agent.ChatGoogleGenerativeAI')
16
+ self.mock_llm_class = self.llm_patch.start()
17
+ self.mock_llm = self.mock_llm_class.return_value
18
+
19
+ # Sample documents
20
+ self.sample_docs = [
21
+ Document(page_content="This is a test document about AI.", metadata={"source": "test1.pdf"}),
22
+ Document(page_content="LangChain is a framework for LLM applications.", metadata={"source": "test2.pdf"})
23
+ ]
24
+
25
+ # Initialize agent
26
+ self.agent = RAGAgent(api_key="test_api_key")
27
+
28
+ def tearDown(self):
29
+ self.vector_store_patch.stop()
30
+ self.llm_patch.stop()
31
+
32
+ def test_retrieve_context(self):
33
+ # Configure mock
34
+ self.mock_vector_store.similarity_search.return_value = self.sample_docs
35
+
36
+ # Call the method
37
+ result = self.agent.retrieve_context("What is LangChain?")
38
+
39
+ # Assertions
40
+ self.assertEqual(result, self.sample_docs)
41
+ self.mock_vector_store.similarity_search.assert_called_once()
42
+
43
+ def test_get_rag_response_with_context(self):
44
+ # Mock similarity_search to return 2 documents
45
+ self.mock_vector_store.similarity_search.return_value = self.sample_docs
46
+
47
+ # Mock rag_chain
48
+ mock_chain = MagicMock()
49
+ mock_chain.invoke.return_value.content = "LangChain is a framework for building LLM applications."
50
+ self.agent.rag_chain = mock_chain
51
+
52
+ # Call the method
53
+ result = self.agent.get_rag_response("What is LangChain?")
54
+
55
+ # Assertions
56
+ self.assertEqual(result["response"], "LangChain is a framework for building LLM applications.")
57
+ self.assertEqual(len(result["context"]), 2)
58
+ self.assertEqual(result["context"][0]["page_content"], "This is a test document about AI.")
59
+
60
+
61
+ def test_get_rag_response_no_context(self):
62
+ # Configure mock to return empty list
63
+ self.mock_vector_store.similarity_search.return_value = []
64
+
65
+ # Call the method
66
+ result = self.agent.get_rag_response("What is LangChain?")
67
+
68
+ # Assertions
69
+ self.assertEqual(len(result["context"]), 0)
70
+ self.assertIn("couldn't find any relevant information", result["response"])
71
+
upload/tests/test_workflow.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from unittest.mock import patch, MagicMock
3
+ from graph.workflow import LangGraphWorkflow, WorkflowState
4
+
5
+ class TestLangGraphWorkflow(unittest.TestCase):
6
+
7
+ def setUp(self):
8
+ # Create mocks for agents
9
+ self.router_agent_patch = patch('graph.workflow.RouterAgent')
10
+ self.weather_agent_patch = patch('graph.workflow.WeatherAgent')
11
+ self.rag_agent_patch = patch('graph.workflow.RAGAgent')
12
+ self.evaluator_patch = patch('graph.workflow.LangSmithEvaluator')
13
+
14
+ self.mock_router_agent_class = self.router_agent_patch.start()
15
+ self.mock_weather_agent_class = self.weather_agent_patch.start()
16
+ self.mock_rag_agent_class = self.rag_agent_patch.start()
17
+ self.mock_evaluator_class = self.evaluator_patch.start()
18
+
19
+ self.mock_router_agent = self.mock_router_agent_class.return_value
20
+ self.mock_weather_agent = self.mock_weather_agent_class.return_value
21
+ self.mock_rag_agent = self.mock_rag_agent_class.return_value
22
+ self.mock_evaluator = self.mock_evaluator_class.return_value
23
+
24
+ # Initialize workflow
25
+ self.workflow = LangGraphWorkflow()
26
+
27
+ def tearDown(self):
28
+ self.router_agent_patch.stop()
29
+ self.weather_agent_patch.stop()
30
+ self.rag_agent_patch.stop()
31
+ self.evaluator_patch.stop()
32
+
33
+ def test_route_to_weather(self):
34
+ # Configure mock
35
+ self.mock_router_agent.route_query.return_value = "weather"
36
+
37
+ # Create state
38
+ state = WorkflowState(query="What's the weather in London?")
39
+
40
+ # Call the method
41
+ result = self.workflow.route(state)
42
+
43
+ # Assertions
44
+ self.assertEqual(result.action, "weather")
45
+ self.mock_router_agent.route_query.assert_called_once_with("What's the weather in London?")
46
+
47
+ def test_route_to_document(self):
48
+ # Configure mock
49
+ self.mock_router_agent.route_query.return_value = "document"
50
+
51
+ # Create state
52
+ state = WorkflowState(query="What is LangChain?")
53
+
54
+ # Call the method
55
+ result = self.workflow.route(state)
56
+
57
+ # Assertions
58
+ self.assertEqual(result.action, "document")
59
+ self.mock_router_agent.route_query.assert_called_once_with("What is LangChain?")
60
+
61
+ def test_process_weather(self):
62
+ # Configure mock
63
+ self.mock_weather_agent.get_weather_response.return_value = {
64
+ "city": "London",
65
+ "weather_data": {"temp": 15.5},
66
+ "response": "The weather in London is 15.5°C."
67
+ }
68
+
69
+ # Create state
70
+ state = WorkflowState(query="What's the weather in London?", action="weather")
71
+
72
+ # Call the method
73
+ result = self.workflow.process_weather(state)
74
+
75
+ # Assertions
76
+ self.assertEqual(result.city, "London")
77
+ self.assertEqual(result.weather_data, {"temp": 15.5})
78
+ self.assertEqual(result.response, "The weather in London is 15.5°C.")
upload/utils/__init__.py ADDED
File without changes
upload/utils/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (160 Bytes). View file
 
upload/utils/__pycache__/api_handler.cpython-311.pyc ADDED
Binary file (4.01 kB). View file
 
upload/utils/__pycache__/document_loader.cpython-311.pyc ADDED
Binary file (4.25 kB). View file
 
upload/utils/__pycache__/evaluation.cpython-311.pyc ADDED
Binary file (3.24 kB). View file
 
upload/utils/api_handler.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from typing import Dict, Any, Optional
3
+ import json
4
+ import requests
5
+ import os
6
+ from dotenv import load_dotenv
7
+ load_dotenv()
8
+
9
+ OPENWEATHERMAP_API_KEY = os.getenv("OPENWEATHERMAP_API_KEY")
10
+ WEATHER_API_BASE_URL = "https://api.openweathermap.org/data/2.5/weather"
11
+
12
+ class WeatherAPIHandler:
13
+ """Handler for the OpenWeatherMap API"""
14
+
15
+ def __init__(self, api_key: str = OPENWEATHERMAP_API_KEY):
16
+ self.api_key = api_key
17
+ self.base_url = WEATHER_API_BASE_URL
18
+
19
+ def get_weather(self, city: str) -> Dict[str, Any]:
20
+ """Fetch weather data for a given city"""
21
+ params = {
22
+ 'q': city,
23
+ 'appid': self.api_key,
24
+ 'units': 'metric'
25
+ }
26
+
27
+ try:
28
+ response = requests.get(self.base_url, params=params)
29
+ response.raise_for_status()
30
+ return response.json()
31
+
32
+ except requests.exceptions.HTTPError as e:
33
+ status_code = e.response.status_code if e.response else None
34
+ if status_code == 404:
35
+ return {"error": f"City {city} not found"}
36
+ return {"error": f"HTTP Error: {str(e)}"}
37
+
38
+ except requests.exceptions.RequestException as e:
39
+ return {"error": f"Request Error: {str(e)}"}
40
+
41
+ except json.JSONDecodeError:
42
+ return {"error": "Failed to parse API response"}
43
+
44
+
45
+ def format_weather_data(self, weather_data: Dict[str, Any]) -> str:
46
+ """Format weather data into a readable string"""
47
+ if "error" in weather_data:
48
+ return weather_data["error"]
49
+
50
+ try:
51
+ city = weather_data["name"]
52
+ country = weather_data["sys"]["country"]
53
+ temp = weather_data["main"]["temp"]
54
+ feels_like = weather_data["main"]["feels_like"]
55
+ humidity = weather_data["main"]["humidity"]
56
+ weather_desc = weather_data["weather"][0]["description"]
57
+ wind_speed = weather_data["wind"]["speed"]
58
+
59
+ formatted_result = f"""
60
+ Weather in {city}, {country}:
61
+ - Temperature: {temp}°C (Feels like: {feels_like}°C)
62
+ - Conditions: {weather_desc.capitalize()}
63
+ - Humidity: {humidity}%
64
+ - Wind Speed: {wind_speed} m/s
65
+ """
66
+ return formatted_result
67
+ except KeyError:
68
+ return "Error formatting weather data: incomplete or invalid data received"
upload/utils/document_loader.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List, Dict, Any
3
+ import tempfile
4
+ from pathlib import Path
5
+
6
+ from langchain_community.document_loaders import PyPDFLoader
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain.schema import Document
9
+
10
+
11
+
12
+ class DocumentLoader:
13
+ """Handles loading and processing PDF documents"""
14
+
15
+ def __init__(self, document_dir: str = "documents"):
16
+ self.document_dir = document_dir
17
+ self.text_splitter = RecursiveCharacterTextSplitter(
18
+ chunk_size=1000,
19
+ chunk_overlap=200,
20
+ length_function=len,
21
+ )
22
+
23
+ # Create documents directory if it doesn't exist
24
+ os.makedirs(document_dir, exist_ok=True)
25
+
26
+ def load_pdf(self, file_path: str) -> List[Document]:
27
+ """Load and split a PDF document into chunks"""
28
+ try:
29
+ loader = PyPDFLoader(file_path)
30
+ documents = loader.load()
31
+ return self.text_splitter.split_documents(documents)
32
+ except Exception as e:
33
+ print(f"Error loading PDF: {str(e)}")
34
+ return []
35
+
36
+ def save_uploaded_pdf(self, uploaded_file) -> str:
37
+ """Save an uploaded PDF file with its original name and return its path"""
38
+ try:
39
+ # Make sure document_dir exists
40
+ os.makedirs(self.document_dir, exist_ok=True)
41
+
42
+ # Sanitize the original filename to prevent path traversal or special characters
43
+ safe_filename = os.path.basename(uploaded_file.name)
44
+ save_path = os.path.join(self.document_dir, safe_filename)
45
+
46
+ # Save file content
47
+ with open(save_path, 'wb') as f:
48
+ f.write(uploaded_file.getvalue())
49
+
50
+ return save_path
51
+ except Exception as e:
52
+ print(f"Error saving uploaded PDF: {str(e)}")
53
+ return ""
54
+
55
+
56
+ def get_available_documents(self) -> List[str]:
57
+ """Get a list of available PDF documents"""
58
+ try:
59
+ return [f for f in os.listdir(self.document_dir) if f.endswith('.pdf')]
60
+ except Exception as e:
61
+ print(f"Error listing documents: {str(e)}")
62
+ return []
upload/utils/evaluation.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Any
2
+ from langsmith import Client
3
+ from langchain.smith import RunEvalConfig
4
+ from langsmith.evaluation import run_evaluator
5
+ from langchain_google_genai import ChatGoogleGenerativeAI
6
+ import os
7
+ from dotenv import load_dotenv
8
+ load_dotenv()
9
+
10
+ LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY")
11
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
12
+
13
+ class LangSmithEvaluator:
14
+ """Handles evaluation using LangSmith"""
15
+
16
+ def __init__(self, api_key: str = LANGSMITH_API_KEY):
17
+ self.client = Client(api_key=api_key)
18
+ self.evaluator_llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",google_api_key=GEMINI_API_KEY)
19
+
20
+ def evaluate_response(self, query: str, response: str, reference: str = None) -> Dict[str, Any]:
21
+ """Evaluate an LLM response against a query and optional reference"""
22
+ eval_config = RunEvalConfig(
23
+ evaluators=[
24
+ "criteria",
25
+ "embedding_distance",
26
+ ],
27
+ custom_evaluators=[
28
+ run_evaluator.RunEvalConfig(
29
+ evaluator="correctness",
30
+ llm=self.evaluator_llm
31
+ ),
32
+ run_evaluator.RunEvalConfig(
33
+ evaluator="helpfulness",
34
+ llm=self.evaluator_llm
35
+ ),
36
+ run_evaluator.RunEvalConfig(
37
+ evaluator="relevance",
38
+ llm=self.evaluator_llm
39
+ ),
40
+ ]
41
+ )
42
+
43
+ try:
44
+ # Create dataset with single example
45
+ dataset = self.client.create_dataset(
46
+ "evaluation_dataset",
47
+ description="Dataset for evaluation of LLM responses"
48
+ )
49
+
50
+ # Add example
51
+ self.client.create_example(
52
+ inputs={"question": query},
53
+ outputs={"answer": response},
54
+ dataset_id=dataset.id
55
+ )
56
+
57
+ # Run evaluation
58
+ evaluation_results = self.client.run_evaluation(
59
+ dataset_id=dataset.id,
60
+ config=eval_config
61
+ )
62
+
63
+ return evaluation_results
64
+ except Exception as e:
65
+ print(f"Error during evaluation: {str(e)}")
66
+ return {"error": str(e)}