Spaces:

AmritSbisht
/

Doc-Weather-Bot

Runtime error

+from typing import Dict, Any, List, Literal, TypedDict, Annotated, Union
+from langchain.schema import Document
+from pydantic import BaseModel, Field
+from langgraph.graph import StateGraph, END
+from agents.router_agent import RouterAgent
+from agents.weather_agent import WeatherAgent
+from agents.rag_agent import RAGAgent
+from utils.evaluation import LangSmithEvaluator
+class WorkflowState(BaseModel):
+    """State for the workflow graph"""
+    query: str = Field(description="The user's original query")
+    action: str = Field(description="The action to take: 'weather' or 'document'", default="")
+    context: List[Dict[str, Any]] = Field(description="Retrieved context (for document queries)", default=[])
+    weather_data: Dict[str, Any] = Field(description="Weather data (for weather queries)", default={})
+    city: str = Field(description="City for weather queries", default="")
+    response: str = Field(description="The final response to the user", default="")
+    evaluation: Dict[str, Any] = Field(description="Evaluation results", default={})
+class LangGraphWorkflow:
+    """LangGraph workflow for the AI pipeline"""
+    def __init__(self):
+        self.router_agent = RouterAgent()
+        self.weather_agent = WeatherAgent()
+        self.rag_agent = RAGAgent()
+        self.evaluator = LangSmithEvaluator()
+        # Build the workflow graph
+        self.workflow = self.build_workflow()
+    def route(self, state: WorkflowState) -> WorkflowState:
+        """Route the query to the appropriate agent"""
+        action = self.router_agent.route_query(state.query)
+        return state.copy(update={"action": action})
+    def process_weather(self, state: WorkflowState) -> WorkflowState:
+        """Process weather-related queries"""
+        weather_response = self.weather_agent.get_weather_response(state.query)
+        return state.copy(update={
+            "city": weather_response["city"],
+            "weather_data": weather_response["weather_data"],
+            "response": weather_response["response"]
+        })
+    def process_document(self, state: WorkflowState) -> WorkflowState:
+        """Process document-related queries"""
+        rag_response = self.rag_agent.get_rag_response(state.query)
+        return state.copy(update={
+            "context": rag_response["context"],
+            "response": rag_response["response"]
+        })
+    def evaluate_response(self, state: WorkflowState) -> WorkflowState:
+        """Evaluate the response using LangSmith"""
+        # For simplicity, we're only evaluating basic metrics here
+        evaluation = {
+            "query": state.query,
+            "response": state.response,
+            "action": state.action,
+            # Additional metrics would come from LangSmith in a real implementation
+            "confidence": 0.95 if state.context or state.weather_data else 0.7,
+            "latency": 1.2,  # Example metric
+        }
+        return state.copy(update={"evaluation": evaluation})
+    def build_workflow(self) -> StateGraph:
+        """Build the LangGraph workflow"""
+        workflow = StateGraph(WorkflowState)
+        # Register nodes with names + actual methods
+        workflow.add_node("router", self.route)  # Use callable (method) for logic
+        workflow.add_node("weather", self.process_weather)  # Use callable
+        workflow.add_node("document", self.process_document)  # Use callable
+        workflow.add_node("evaluate", self.evaluate_response)  # Use callable
+        # Conditional edges — based on state.action
+        workflow.add_conditional_edges(
+        "router",  # Source node
+        lambda state: state.action,  # Condition function
+        {
+            "weather": "weather",  # Condition -> Target node
+            "document": "document"
+        }
+        )
+        # Sequential steps
+        workflow.add_edge("weather", "evaluate")  # Use node names
+        workflow.add_edge("document", "evaluate")  # Use node names
+        workflow.add_edge("evaluate", END)  # Use node name
+        # Set entry point
+        workflow.set_entry_point("router")  # Use node name
+        return workflow.compile()
+    def invoke(self, query: str) -> Dict[str, Any]:
+        """Invoke the workflow with a query"""
+        state = WorkflowState(query=query)
+        result = self.workflow.invoke(state)
+        return result

models/__init__.py ADDED Viewed

File without changes

models/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (161 Bytes). View file

models/__pycache__/vector_store.cpython-311.pyc ADDED Viewed

Binary file (4.03 kB). View file

models/embedding.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from typing import List, Dict, Any
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+from langchain.schema import Document
+import os
+from dotenv import load_dotenv
+load_dotenv()
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+class EmbeddingModel:
+    """Handles document embedding using Google's Gemini embedding models"""
+    def __init__(self, api_key: str = GEMINI_API_KEY):
+        self.embeddings = GoogleGenerativeAIEmbeddings(
+            google_api_key=api_key,
+            model="models/text-embedding-004"
+        )
+    def embed_documents(self, documents: List[Document]) -> List[List[float]]:
+        """Generate embeddings for a list of documents"""
+        texts = [doc.page_content for doc in documents]
+        return self.embeddings.embed_documents(texts)
+    def embed_query(self, query: str) -> List[float]:
+        """Generate embedding for a query string"""
+        return self.embeddings.embed_query(query)

models/vector_store.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from typing import List, Dict, Any, Optional
+from langchain.schema import Document
+from langchain_community.vectorstores import Qdrant
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+from qdrant_client import QdrantClient
+from qdrant_client.http import models as rest
+from dotenv import load_dotenv
+import os
+QDRANT_COLLECTION_NAME = os.getenv("QDRANT_COLLECTION_NAME")
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+db_url = os.getenv("db_url")
+db_api = os.getenv("db_api")
+class VectorStore:
+    """Interface to the Qdrant vector database"""
+    def __init__(
+        self,
+        collection_name: str = QDRANT_COLLECTION_NAME,
+        db_url: str = db_url,
+        db_api: int = db_api,
+        api_key: str = GEMINI_API_KEY
+    ):
+        self.collection_name = collection_name
+        self.embeddings = GoogleGenerativeAIEmbeddings(
+            google_api_key=api_key,
+            model="models/text-embedding-004"
+        )
+        # Initialize Qdrant client
+        self.client = QdrantClient( url=f"https://{db_url}",
+            api_key=db_api)
+        # Create collection if it doesn't exist
+        collections = self.client.get_collections().collections
+        collection_names = [collection.name for collection in collections]
+        if collection_name not in collection_names:
+            self.client.create_collection(
+                collection_name=collection_name,
+                vectors_config=rest.VectorParams(
+                    size=768,  # Gemini embedding dimension
+                    distance=rest.Distance.COSINE
+                )
+            )
+        # Initialize Qdrant vectorstore
+        self.vectorstore = Qdrant(
+            client=self.client,
+            collection_name=collection_name,
+            embeddings=self.embeddings
+        )
+    def add_documents(self, documents: List[Document]) -> bool:
+        """Add documents to the vector store"""
+        try:
+            self.vectorstore.add_documents(documents)
+            return True
+        except Exception as e:
+            print(f"Error adding documents to vector store: {str(e)}")
+            return False
+    def similarity_search(self, query: str, k: int = 4) -> List[Document]:
+        """Perform similarity search for a query"""
+        try:
+            return self.vectorstore.similarity_search(query, k=k)
+        except Exception as e:
+            print(f"Error during similarity search: {str(e)}")
+            return []

tests/__init__.py ADDED Viewed

File without changes

tests/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (160 Bytes). View file

tests/__pycache__/test_api_handler.cpython-311-pytest-8.3.5.pyc ADDED Viewed

Binary file (4.51 kB). View file

tests/__pycache__/test_rag_agent.cpython-311-pytest-8.3.5.pyc ADDED Viewed

Binary file (4.4 kB). View file

tests/__pycache__/test_workflow.cpython-311-pytest-8.3.5.pyc ADDED Viewed

Binary file (4.65 kB). View file

tests/test_api_handler.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import unittest
+from unittest.mock import patch, MagicMock
+import json
+from utils.api_handler import WeatherAPIHandler
+from requests.exceptions import RequestException
+from requests.exceptions import HTTPError
+class TestWeatherAPIHandler(unittest.TestCase):
+    def setUp(self):
+        self.api_handler = WeatherAPIHandler(api_key="test_api_key")
+        # Sample successful response data
+        self.sample_response = {
+            "name": "London",
+            "sys": {"country": "GB"},
+            "main": {
+                "temp": 15.5,
+                "feels_like": 14.8,
+                "humidity": 76
+            },
+            "weather": [{"description": "scattered clouds"}],
+            "wind": {"speed": 3.6}
+        }
+    @patch('requests.get')
+    def test_get_weather_success(self, mock_get):
+        # Configure mock
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = self.sample_response
+        mock_get.return_value = mock_response
+        # Call the method
+        result = self.api_handler.get_weather("London")
+        # Assertions
+        self.assertEqual(result, self.sample_response)
+        mock_get.assert_called_once()
+    @patch('requests.get')
+    def test_get_weather_city_not_found(self, mock_get):
+        mock_response = MagicMock()
+        mock_response.status_code = 404
+        mock_response.raise_for_status.side_effect = HTTPError(response=mock_response)
+        mock_get.return_value = mock_response
+        result = self.api_handler.get_weather("NonExistentCity")
+        self.assertIn("error", result)
+        self.assertIn("NonExistentCity", result["error"])
+    @patch('requests.get')
+    def test_get_weather_connection_error(self, mock_get):
+        mock_get.side_effect = RequestException("Connection Error")
+        result = self.api_handler.get_weather("London")
+        self.assertIn("error", result)
+        self.assertIn("Connection Error", result["error"])
+    def test_format_weather_data_success(self):
+        # Call the method
+        formatted_result = self.api_handler.format_weather_data(self.sample_response)
+        # Assertions
+        self.assertIn("London", formatted_result)
+        self.assertIn("15.5°C", formatted_result)
+        self.assertIn("scattered clouds", formatted_result.lower())
+    def test_format_weather_data_error(self):
+        # Call the method with incomplete data
+        formatted_result = self.api_handler.format_weather_data({"error": "City not found"})
+        # Assertions
+        self.assertEqual(formatted_result, "City not found")

tests/test_rag_agent.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import unittest
+from unittest.mock import patch, MagicMock
+from agents.rag_agent import RAGAgent
+from langchain.schema import Document
+class TestRAGAgent(unittest.TestCase):
+    def setUp(self):
+        # Create a mock for vector store
+        self.vector_store_patch = patch('agents.rag_agent.VectorStore')
+        self.mock_vector_store_class = self.vector_store_patch.start()
+        self.mock_vector_store = self.mock_vector_store_class.return_value
+        # Create a mock for LLM
+        self.llm_patch = patch('agents.rag_agent.ChatGoogleGenerativeAI')
+        self.mock_llm_class = self.llm_patch.start()
+        self.mock_llm = self.mock_llm_class.return_value
+        # Sample documents
+        self.sample_docs = [
+            Document(page_content="This is a test document about AI.", metadata={"source": "test1.pdf"}),
+            Document(page_content="LangChain is a framework for LLM applications.", metadata={"source": "test2.pdf"})
+        ]
+        # Initialize agent
+        self.agent = RAGAgent(api_key="test_api_key")
+    def tearDown(self):
+        self.vector_store_patch.stop()
+        self.llm_patch.stop()
+    def test_retrieve_context(self):
+        # Configure mock
+        self.mock_vector_store.similarity_search.return_value = self.sample_docs
+        # Call the method
+        result = self.agent.retrieve_context("What is LangChain?")
+        # Assertions
+        self.assertEqual(result, self.sample_docs)
+        self.mock_vector_store.similarity_search.assert_called_once()
+    def test_get_rag_response_with_context(self):
+        # Mock similarity_search to return 2 documents
+        self.mock_vector_store.similarity_search.return_value = self.sample_docs
+        # Mock rag_chain
+        mock_chain = MagicMock()
+        mock_chain.invoke.return_value.content = "LangChain is a framework for building LLM applications."
+        self.agent.rag_chain = mock_chain
+        # Call the method
+        result = self.agent.get_rag_response("What is LangChain?")
+        # Assertions
+        self.assertEqual(result["response"], "LangChain is a framework for building LLM applications.")
+        self.assertEqual(len(result["context"]), 2)
+        self.assertEqual(result["context"][0]["page_content"], "This is a test document about AI.")
+    def test_get_rag_response_no_context(self):
+        # Configure mock to return empty list
+        self.mock_vector_store.similarity_search.return_value = []
+        # Call the method
+        result = self.agent.get_rag_response("What is LangChain?")
+        # Assertions
+        self.assertEqual(len(result["context"]), 0)
+        self.assertIn("couldn't find any relevant information", result["response"])

tests/test_workflow.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import unittest
+from unittest.mock import patch, MagicMock
+from graph.workflow import LangGraphWorkflow, WorkflowState
+class TestLangGraphWorkflow(unittest.TestCase):
+    def setUp(self):
+        # Create mocks for agents
+        self.router_agent_patch = patch('graph.workflow.RouterAgent')
+        self.weather_agent_patch = patch('graph.workflow.WeatherAgent')
+        self.rag_agent_patch = patch('graph.workflow.RAGAgent')
+        self.evaluator_patch = patch('graph.workflow.LangSmithEvaluator')
+        self.mock_router_agent_class = self.router_agent_patch.start()
+        self.mock_weather_agent_class = self.weather_agent_patch.start()
+        self.mock_rag_agent_class = self.rag_agent_patch.start()
+        self.mock_evaluator_class = self.evaluator_patch.start()
+        self.mock_router_agent = self.mock_router_agent_class.return_value
+        self.mock_weather_agent = self.mock_weather_agent_class.return_value
+        self.mock_rag_agent = self.mock_rag_agent_class.return_value
+        self.mock_evaluator = self.mock_evaluator_class.return_value
+        # Initialize workflow
+        self.workflow = LangGraphWorkflow()
+    def tearDown(self):
+        self.router_agent_patch.stop()
+        self.weather_agent_patch.stop()
+        self.rag_agent_patch.stop()
+        self.evaluator_patch.stop()
+    def test_route_to_weather(self):
+        # Configure mock
+        self.mock_router_agent.route_query.return_value = "weather"
+        # Create state
+        state = WorkflowState(query="What's the weather in London?")
+        # Call the method
+        result = self.workflow.route(state)
+        # Assertions
+        self.assertEqual(result.action, "weather")
+        self.mock_router_agent.route_query.assert_called_once_with("What's the weather in London?")
+    def test_route_to_document(self):
+        # Configure mock
+        self.mock_router_agent.route_query.return_value = "document"
+        # Create state
+        state = WorkflowState(query="What is LangChain?")
+        # Call the method
+        result = self.workflow.route(state)
+        # Assertions
+        self.assertEqual(result.action, "document")
+        self.mock_router_agent.route_query.assert_called_once_with("What is LangChain?")
+    def test_process_weather(self):
+        # Configure mock
+        self.mock_weather_agent.get_weather_response.return_value = {
+            "city": "London",
+            "weather_data": {"temp": 15.5},
+            "response": "The weather in London is 15.5°C."
+        }
+        # Create state
+        state = WorkflowState(query="What's the weather in London?", action="weather")
+        # Call the method
+        result = self.workflow.process_weather(state)
+        # Assertions
+        self.assertEqual(result.city, "London")
+        self.assertEqual(result.weather_data, {"temp": 15.5})
+        self.assertEqual(result.response, "The weather in London is 15.5°C.")

utils/__init__.py ADDED Viewed

File without changes

utils/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (160 Bytes). View file

utils/__pycache__/api_handler.cpython-311.pyc ADDED Viewed

Binary file (4.01 kB). View file

utils/__pycache__/document_loader.cpython-311.pyc ADDED Viewed

Binary file (4.25 kB). View file

utils/__pycache__/evaluation.cpython-311.pyc ADDED Viewed

Binary file (3.24 kB). View file

utils/api_handler.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import requests
+from typing import Dict, Any, Optional
+import json
+import requests
+import os
+from dotenv import load_dotenv
+load_dotenv()
+OPENWEATHERMAP_API_KEY = os.getenv("OPENWEATHERMAP_API_KEY")
+WEATHER_API_BASE_URL = "https://api.openweathermap.org/data/2.5/weather"
+class WeatherAPIHandler:
+    """Handler for the OpenWeatherMap API"""
+    def __init__(self, api_key: str = OPENWEATHERMAP_API_KEY):
+        self.api_key = api_key
+        self.base_url = WEATHER_API_BASE_URL
+    def get_weather(self, city: str) -> Dict[str, Any]:
+        """Fetch weather data for a given city"""
+        params = {
+            'q': city,
+            'appid': self.api_key,
+            'units': 'metric'
+        }
+        try:
+            response = requests.get(self.base_url, params=params)
+            response.raise_for_status()
+            return response.json()
+        except requests.exceptions.HTTPError as e:
+            status_code = e.response.status_code if e.response else None
+            if status_code == 404:
+                return {"error": f"City {city} not found"}
+            return {"error": f"HTTP Error: {str(e)}"}
+        except requests.exceptions.RequestException as e:
+            return {"error": f"Request Error: {str(e)}"}
+        except json.JSONDecodeError:
+            return {"error": "Failed to parse API response"}
+    def format_weather_data(self, weather_data: Dict[str, Any]) -> str:
+        """Format weather data into a readable string"""
+        if "error" in weather_data:
+            return weather_data["error"]
+        try:
+            city = weather_data["name"]
+            country = weather_data["sys"]["country"]
+            temp = weather_data["main"]["temp"]
+            feels_like = weather_data["main"]["feels_like"]
+            humidity = weather_data["main"]["humidity"]
+            weather_desc = weather_data["weather"][0]["description"]
+            wind_speed = weather_data["wind"]["speed"]
+            formatted_result = f"""
+            Weather in {city}, {country}:
+            - Temperature: {temp}°C (Feels like: {feels_like}°C)
+            - Conditions: {weather_desc.capitalize()}
+            - Humidity: {humidity}%
+            - Wind Speed: {wind_speed} m/s
+            """
+            return formatted_result
+        except KeyError:
+            return "Error formatting weather data: incomplete or invalid data received"

utils/document_loader.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import os
+from typing import List, Dict, Any
+import tempfile
+from pathlib import Path
+from langchain_community.document_loaders import PyPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.schema import Document
+class DocumentLoader:
+    """Handles loading and processing PDF documents"""
+    def __init__(self, document_dir: str = "documents"):
+        self.document_dir = document_dir
+        self.text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1000,
+            chunk_overlap=200,
+            length_function=len,
+        )
+        # Create documents directory if it doesn't exist
+        os.makedirs(document_dir, exist_ok=True)
+    def load_pdf(self, file_path: str) -> List[Document]:
+        """Load and split a PDF document into chunks"""
+        try:
+            loader = PyPDFLoader(file_path)
+            documents = loader.load()
+            return self.text_splitter.split_documents(documents)
+        except Exception as e:
+            print(f"Error loading PDF: {str(e)}")
+            return []
+    def save_uploaded_pdf(self, uploaded_file) -> str:
+        """Save an uploaded PDF file with its original name and return its path"""
+        try:
+            # Make sure document_dir exists
+            os.makedirs(self.document_dir, exist_ok=True)
+            # Sanitize the original filename to prevent path traversal or special characters
+            safe_filename = os.path.basename(uploaded_file.name)
+            save_path = os.path.join(self.document_dir, safe_filename)
+            # Save file content
+            with open(save_path, 'wb') as f:
+                f.write(uploaded_file.getvalue())
+            return save_path
+        except Exception as e:
+            print(f"Error saving uploaded PDF: {str(e)}")
+            return ""
+    def get_available_documents(self) -> List[str]:
+        """Get a list of available PDF documents"""
+        try:
+            return [f for f in os.listdir(self.document_dir) if f.endswith('.pdf')]
+        except Exception as e:
+            print(f"Error listing documents: {str(e)}")
+            return []

utils/evaluation.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from typing import Dict, Any
+from langsmith import Client
+from langchain.smith import RunEvalConfig
+from langsmith.evaluation import run_evaluator
+from langchain_google_genai import ChatGoogleGenerativeAI
+import os
+from dotenv import load_dotenv
+load_dotenv()
+LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY")
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+class LangSmithEvaluator:
+    """Handles evaluation using LangSmith"""
+    def __init__(self, api_key: str = LANGSMITH_API_KEY):
+        self.client = Client(api_key=api_key)
+        self.evaluator_llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",google_api_key=GEMINI_API_KEY)
+    def evaluate_response(self, query: str, response: str, reference: str = None) -> Dict[str, Any]:
+        """Evaluate an LLM response against a query and optional reference"""
+        eval_config = RunEvalConfig(
+            evaluators=[
+                "criteria",
+                "embedding_distance",
+            ],
+            custom_evaluators=[
+                run_evaluator.RunEvalConfig(
+                    evaluator="correctness",
+                    llm=self.evaluator_llm
+                ),
+                run_evaluator.RunEvalConfig(
+                    evaluator="helpfulness",
+                    llm=self.evaluator_llm
+                ),
+                run_evaluator.RunEvalConfig(
+                    evaluator="relevance",
+                    llm=self.evaluator_llm
+                ),
+            ]
+        )
+        try:
+            # Create dataset with single example
+            dataset = self.client.create_dataset(
+                "evaluation_dataset",
+                description="Dataset for evaluation of LLM responses"
+            )
+            # Add example
+            self.client.create_example(
+                inputs={"question": query},
+                outputs={"answer": response},
+                dataset_id=dataset.id
+            )
+            # Run evaluation
+            evaluation_results = self.client.run_evaluation(
+                dataset_id=dataset.id,
+                config=eval_config
+            )
+            return evaluation_results
+        except Exception as e:
+            print(f"Error during evaluation: {str(e)}")
+            return {"error": str(e)}