Spaces:

KunalShaw
/

IMSKOS

Sleeping

KUNAL SHAW commited on Dec 9, 2025

Commit

d7ccaae

1 Parent(s): bc1532c

feat: Add HuggingFace Spaces support and LLM response generation

- Added YAML frontmatter for HuggingFace Spaces deployment
- Added LLM response generation using Groq (not just retrieval)
- Added LangGraph workflow with retrieve -> generate pipeline
- Added USER_AGENT configuration to suppress warnings
- Added Dockerfile and docker-compose.yml for containerized deployment
- Added .streamlit config for theming and settings
- Updated requirements.txt for HuggingFace Spaces compatibility
- Improved secret loading for both local and cloud deployments

Files changed (8) hide show

.gitignore +46 -2
.streamlit/config.toml +23 -0
.streamlit/secrets.toml.example +21 -0
Dockerfile +46 -0
README.md +14 -0
app.py +157 -35
docker-compose.yml +36 -0
requirements.txt +22 -22

.gitignore CHANGED Viewed

@@ -6,17 +6,61 @@ __pycache__/
 env/
 venv/
 .venv/
 # Streamlit / IDE
-.streamlit/
 .vscode/
 # Secrets and credentials
 .env
 *.json
 cs23b1039@iiitr.ac.in-token.json
 *.sqlite
 .DS_Store
 # Logs
-*.log

 env/
 venv/
 .venv/
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
 # Streamlit / IDE
+.streamlit/secrets.toml
 .vscode/
 # Secrets and credentials
 .env
 *.json
+!package.json
 cs23b1039@iiitr.ac.in-token.json
 *.sqlite
 .DS_Store
 # Logs
+*.log
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+# Docker
+.docker/
+# Jupyter Notebooks
+.ipynb_checkpoints/
+*.ipynb
+# Model files (large)
+*.h5
+*.pkl
+*.pt
+*.pth
+# OS generated
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,23 @@

+[theme]
+primaryColor = "#667eea"
+backgroundColor = "#ffffff"
+secondaryBackgroundColor = "#f0f2f6"
+textColor = "#262730"
+font = "sans serif"
+[server]
+headless = true
+port = 8501
+enableXsrfProtection = true
+maxUploadSize = 50
+[browser]
+gatherUsageStats = false
+serverAddress = "localhost"
+[runner]
+magicEnabled = true
+[client]
+showErrorDetails = true
+toolbarMode = "auto"

.streamlit/secrets.toml.example ADDED Viewed

	@@ -0,0 +1,21 @@

+# ==================== Streamlit Secrets Template ====================
+# IMSKOS - Intelligent Multi-Source Knowledge Orchestration System
+#
+# For Streamlit Cloud deployment:
+# 1. Go to your app settings on Streamlit Cloud
+# 2. Navigate to "Secrets" section
+# 3. Copy the contents below and fill in your actual values
+#
+# For local development:
+# 1. Create a file at .streamlit/secrets.toml
+# 2. Copy the contents below and fill in your actual values
+# ====================================================================
+# DataStax Astra DB Configuration
+# Get these from: https://astra.datastax.com
+ASTRA_DB_APPLICATION_TOKEN = "AstraCS:your_token_here"
+ASTRA_DB_ID = "your_database_id_here"
+# Groq API Configuration
+# Get your API key from: https://console.groq.com
+GROQ_API_KEY = "your_groq_api_key_here"

Dockerfile ADDED Viewed

	@@ -0,0 +1,46 @@

+# ==================== IMSKOS Dockerfile ====================
+# Intelligent Multi-Source Knowledge Orchestration System
+# Production-ready container configuration
+FROM python:3.10-slim
+# Set working directory
+WORKDIR /app
+# Set environment variables
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    STREAMLIT_SERVER_PORT=8501 \
+    STREAMLIT_SERVER_ADDRESS=0.0.0.0
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    curl \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for better caching
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+# Create non-root user for security
+RUN useradd -m -u 1000 appuser && \
+    chown -R appuser:appuser /app
+USER appuser
+# Expose Streamlit port
+EXPOSE 8501
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD curl --fail http://localhost:8501/_stcore/health || exit 1
+# Run Streamlit
+CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.headless=true"]

README.md CHANGED Viewed

@@ -1,9 +1,23 @@
 # 🧠 IMSKOS - Intelligent Multi-Source Knowledge Orchestration System
 [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
 [![LangChain](https://img.shields.io/badge/LangChain-🦜-green.svg)](https://langchain.com/)
 [![LangGraph](https://img.shields.io/badge/LangGraph-🔗-orange.svg)](https://github.com/langchain-ai/langgraph)
 [![Streamlit](https://img.shields.io/badge/Streamlit-🎈-red.svg)](https://streamlit.io/)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 > **Enterprise-Grade Agentic RAG Framework with Adaptive Query Routing**

+---
+title: IMSKOS - Intelligent Knowledge Orchestration
+emoji: 🧠
+colorFrom: purple
+colorTo: blue
+sdk: streamlit
+sdk_version: 1.31.0
+app_file: app.py
+pinned: false
+license: mit
+short_description: Advanced Agentic RAG with LangGraph & Adaptive Query Routing
+---
 # 🧠 IMSKOS - Intelligent Multi-Source Knowledge Orchestration System
 [![Python 3.9+](https://img.shields.io/badge/python-3.9+-blue.svg)](https://www.python.org/downloads/)
 [![LangChain](https://img.shields.io/badge/LangChain-🦜-green.svg)](https://langchain.com/)
 [![LangGraph](https://img.shields.io/badge/LangGraph-🔗-orange.svg)](https://github.com/langchain-ai/langgraph)
 [![Streamlit](https://img.shields.io/badge/Streamlit-🎈-red.svg)](https://streamlit.io/)
+[![HuggingFace](https://img.shields.io/badge/🤗%20Hugging%20Face-Spaces-yellow.svg)](https://huggingface.co/spaces)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 > **Enterprise-Grade Agentic RAG Framework with Adaptive Query Routing**

app.py CHANGED Viewed

@@ -13,7 +13,15 @@ An enterprise-grade, production-ready intelligent query routing system that leve
 import streamlit as st
 import os
-from typing import List, Dict, Any
 # Compatibility shim for different typing.ForwardRef._evaluate signatures
 # ------------------------------------------------------------
@@ -64,6 +72,8 @@ from langchain_community.tools import WikipediaQueryRun
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_groq import ChatGroq
 from langchain_core.documents import Document
 from langgraph.graph import END, StateGraph, START
 from typing_extensions import TypedDict
 from pydantic import BaseModel, Field
@@ -71,6 +81,7 @@ from typing import Literal
 import time
 import json
 from datetime import datetime
 # Page Configuration
 st.set_page_config(
@@ -153,18 +164,44 @@ class Config:
     @staticmethod
     def load_env_variables():
-        """Load and validate environment variables"""
         required_vars = {
-            "ASTRA_DB_APPLICATION_TOKEN": os.getenv("ASTRA_DB_APPLICATION_TOKEN"),
-            "ASTRA_DB_ID": os.getenv("ASTRA_DB_ID"),
-            "GROQ_API_KEY": os.getenv("GROQ_API_KEY")
         }
         missing_vars = [key for key, value in required_vars.items() if not value]
         if missing_vars:
             st.error(f"⚠️ Missing environment variables: {', '.join(missing_vars)}")
-            st.info("Please set these in your .env file or Streamlit secrets")
             st.stop()
         return required_vars
@@ -192,6 +229,7 @@ class GraphState(TypedDict):
     question: str
     generation: str
     documents: List[str]
 # ==================== Core System Classes ====================
@@ -264,6 +302,7 @@ class IntelligentRouter:
         self.groq_api_key = groq_api_key
         self.llm = None
         self.question_router = None
     def initialize(self):
         """Set up LLM and routing chain"""
@@ -294,18 +333,50 @@ Be precise in your routing decisions."""
         ])
         self.question_router = route_prompt | structured_llm
     def route(self, question: str) -> str:
         """Route question to appropriate data source"""
         result = self.question_router.invoke({"question": question})
         return result.datasource
 class AdaptiveRAGWorkflow:
     """LangGraph-based adaptive retrieval workflow"""
-    def __init__(self, vector_store, question_router):
         self.vector_store = vector_store
-        self.question_router = question_router
         self.retriever = vector_store.as_retriever(search_kwargs={"k": 4})
         self.wiki = self._setup_wikipedia()
         self.workflow = None
@@ -314,8 +385,8 @@ class AdaptiveRAGWorkflow:
     def _setup_wikipedia(self):
         """Initialize Wikipedia search tool"""
         api_wrapper = WikipediaAPIWrapper(
-            top_k_results=1,
-            doc_content_chars_max=500
         )
         return WikipediaQueryRun(api_wrapper=api_wrapper)
@@ -323,19 +394,37 @@ class AdaptiveRAGWorkflow:
         """Retrieve from vector store"""
         question = state["question"]
         documents = self.retriever.invoke(question)
-        return {"documents": documents, "question": question}
     def wiki_search(self, state: Dict) -> Dict:
         """Search Wikipedia"""
         question = state["question"]
-        docs = self.wiki.invoke({"query": question})
-        wiki_results = Document(page_content=docs)
-        return {"documents": wiki_results, "question": question}
     def route_question(self, state: Dict) -> str:
         """Route based on question type"""
         question = state["question"]
-        source = self.question_router.route(question)
         if source == "wiki_search":
             return "wiki_search"
@@ -349,8 +438,9 @@ class AdaptiveRAGWorkflow:
         # Add nodes
         workflow.add_node("wiki_search", self.wiki_search)
         workflow.add_node("retrieve", self.retrieve)
-        # Add conditional edges
         workflow.add_conditional_edges(
             START,
             self.route_question,
@@ -360,8 +450,12 @@ class AdaptiveRAGWorkflow:
             },
         )
-        workflow.add_edge("retrieve", END)
-        workflow.add_edge("wiki_search", END)
         self.app = workflow.compile()
@@ -372,15 +466,25 @@ class AdaptiveRAGWorkflow:
         result = {
             "route": None,
             "documents": [],
             "execution_time": 0
         }
         start_time = time.time()
-        for output in self.app.stream(inputs):
-            for key, value in output.items():
-                result["route"] = key
-                result["documents"] = value.get("documents", [])
         result["execution_time"] = time.time() - start_time
@@ -615,9 +719,9 @@ def render_query_tab():
                 # Routing information
                 route = result["route"]
-                route_class = "route-vector" if route == "retrieve" else "route-wiki"
-                route_emoji = "🗄️" if route == "retrieve" else "📖"
-                route_name = "Vector Store" if route == "retrieve" else "Wikipedia"
                 col1, col2, col3 = st.columns(3)
                 with col1:
@@ -629,22 +733,37 @@ def render_query_tab():
                 with col2:
                     st.metric("⚡ Execution Time", f"{result['execution_time']:.2f}s")
                 with col3:
-                    st.metric("📄 Documents", len(result['documents']) if isinstance(result['documents'], list) else 1)
-                # Display documents
-                st.markdown("### 📄 Retrieved Information")
                 documents = result['documents']
-                if isinstance(documents, list):
                     for i, doc in enumerate(documents[:5], 1):
-                        with st.expander(f"📌 Document {i}", expanded=(i == 1)):
-                            st.markdown(doc.page_content)
-                            if advanced_mode and hasattr(doc, 'metadata'):
                                 st.markdown("**Metadata:**")
                                 st.json(doc.metadata)
-                else:
-                    st.markdown(documents.page_content)
                 # Store query history
                 if 'query_history' not in st.session_state:
@@ -654,11 +773,14 @@ def render_query_tab():
                     "query": query,
                     "route": route_name,
                     "timestamp": datetime.now().strftime("%H:%M:%S"),
-                    "execution_time": result['execution_time']
                 })
             except Exception as e:
                 st.error(f"❌ Query execution failed: {str(e)}")
 def render_analytics_tab():
     """Render system analytics and monitoring"""

 import streamlit as st
 import os
+from typing import List, Dict, Any, Optional
+from dotenv import load_dotenv
+# Load environment variables from .env file
+load_dotenv()
+# Set USER_AGENT to suppress warnings from web loaders
+if not os.getenv("USER_AGENT"):
+    os.environ["USER_AGENT"] = "IMSKOS/1.0 (Intelligent Multi-Source Knowledge Orchestration System)"
 # Compatibility shim for different typing.ForwardRef._evaluate signatures
 # ------------------------------------------------------------
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_groq import ChatGroq
 from langchain_core.documents import Document
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough
 from langgraph.graph import END, StateGraph, START
 from typing_extensions import TypedDict
 from pydantic import BaseModel, Field
 import time
 import json
 from datetime import datetime
+import traceback
 # Page Configuration
 st.set_page_config(
     @staticmethod
     def load_env_variables():
+        """Load and validate environment variables from multiple sources
+        Priority order:
+        1. Streamlit secrets (for Streamlit Cloud / HuggingFace Spaces)
+        2. Environment variables (for local development / Docker)
+        """
+        def get_secret(key: str) -> Optional[str]:
+            """Get secret from Streamlit secrets or environment variables"""
+            # First check Streamlit secrets (works on HuggingFace Spaces)
+            try:
+                if hasattr(st, 'secrets') and key in st.secrets:
+                    return st.secrets[key]
+            except Exception:
+                pass
+            # Fall back to environment variables
+            return os.getenv(key)
         required_vars = {
+            "ASTRA_DB_APPLICATION_TOKEN": get_secret("ASTRA_DB_APPLICATION_TOKEN"),
+            "ASTRA_DB_ID": get_secret("ASTRA_DB_ID"),
+            "GROQ_API_KEY": get_secret("GROQ_API_KEY")
         }
         missing_vars = [key for key, value in required_vars.items() if not value]
         if missing_vars:
             st.error(f"⚠️ Missing environment variables: {', '.join(missing_vars)}")
+            st.info("""
+            **Setup Instructions:**
+            1. **Local Development:** Create a `.env` file with your credentials
+            2. **Streamlit Cloud:** Add secrets in the app settings
+            Required variables:
+            - `ASTRA_DB_APPLICATION_TOKEN` - Get from [DataStax Astra](https://astra.datastax.com)
+            - `ASTRA_DB_ID` - Your Astra DB database ID
+            - `GROQ_API_KEY` - Get from [Groq Console](https://console.groq.com)
+            """)
             st.stop()
         return required_vars
     question: str
     generation: str
     documents: List[str]
+    route: str
 # ==================== Core System Classes ====================
         self.groq_api_key = groq_api_key
         self.llm = None
         self.question_router = None
+        self.generation_chain = None
     def initialize(self):
         """Set up LLM and routing chain"""
         ])
         self.question_router = route_prompt | structured_llm
+        # Set up generation chain for synthesizing answers
+        generation_prompt = ChatPromptTemplate.from_messages([
+            ("system", """You are a helpful AI assistant specialized in providing accurate, informative answers.
+Use the following retrieved context to answer the user's question.
+If the context doesn't contain relevant information, say so and provide general guidance.
+Be concise but comprehensive. Use bullet points for clarity when appropriate.
+Context:
+{context}"""),
+            ("human", "{question}")
+        ])
+        self.generation_chain = generation_prompt | self.llm | StrOutputParser()
     def route(self, question: str) -> str:
         """Route question to appropriate data source"""
         result = self.question_router.invoke({"question": question})
         return result.datasource
+    def generate_response(self, question: str, documents: List[Document]) -> str:
+        """Generate a coherent response from retrieved documents"""
+        # Format documents into context string
+        if isinstance(documents, list):
+            context = "\n\n".join([
+                f"Document {i+1}:\n{doc.page_content}"
+                for i, doc in enumerate(documents[:5])
+            ])
+        else:
+            context = documents.page_content if hasattr(documents, 'page_content') else str(documents)
+        response = self.generation_chain.invoke({
+            "context": context,
+            "question": question
+        })
+        return response
 class AdaptiveRAGWorkflow:
     """LangGraph-based adaptive retrieval workflow"""
+    def __init__(self, vector_store, router: IntelligentRouter):
         self.vector_store = vector_store
+        self.router = router
         self.retriever = vector_store.as_retriever(search_kwargs={"k": 4})
         self.wiki = self._setup_wikipedia()
         self.workflow = None
     def _setup_wikipedia(self):
         """Initialize Wikipedia search tool"""
         api_wrapper = WikipediaAPIWrapper(
+            top_k_results=2,
+            doc_content_chars_max=1000
         )
         return WikipediaQueryRun(api_wrapper=api_wrapper)
         """Retrieve from vector store"""
         question = state["question"]
         documents = self.retriever.invoke(question)
+        return {"documents": documents, "question": question, "route": "vectorstore"}
     def wiki_search(self, state: Dict) -> Dict:
         """Search Wikipedia"""
         question = state["question"]
+        try:
+            docs = self.wiki.invoke({"query": question})
+            wiki_results = Document(page_content=docs)
+        except Exception as e:
+            wiki_results = Document(page_content=f"Wikipedia search returned no results for this query. Error: {str(e)}")
+        return {"documents": [wiki_results], "question": question, "route": "wikipedia"}
+    def generate(self, state: Dict) -> Dict:
+        """Generate response from retrieved documents"""
+        question = state["question"]
+        documents = state["documents"]
+        # Use the router's generation chain to create a response
+        generation = self.router.generate_response(question, documents)
+        return {
+            "question": question,
+            "documents": documents,
+            "generation": generation,
+            "route": state.get("route", "unknown")
+        }
     def route_question(self, state: Dict) -> str:
         """Route based on question type"""
         question = state["question"]
+        source = self.router.route(question)
         if source == "wiki_search":
             return "wiki_search"
         # Add nodes
         workflow.add_node("wiki_search", self.wiki_search)
         workflow.add_node("retrieve", self.retrieve)
+        workflow.add_node("generate", self.generate)
+        # Add conditional edges from START
         workflow.add_conditional_edges(
             START,
             self.route_question,
             },
         )
+        # Both retrieval paths lead to generation
+        workflow.add_edge("retrieve", "generate")
+        workflow.add_edge("wiki_search", "generate")
+        # Generation leads to END
+        workflow.add_edge("generate", END)
         self.app = workflow.compile()
         result = {
             "route": None,
             "documents": [],
+            "generation": "",
             "execution_time": 0
         }
         start_time = time.time()
+        try:
+            for output in self.app.stream(inputs):
+                for key, value in output.items():
+                    if key == "generate":
+                        result["generation"] = value.get("generation", "")
+                        result["route"] = value.get("route", "unknown")
+                        result["documents"] = value.get("documents", [])
+                    elif key in ["retrieve", "wiki_search"]:
+                        result["route"] = value.get("route", key)
+                        result["documents"] = value.get("documents", [])
+        except Exception as e:
+            result["generation"] = f"Error executing query: {str(e)}"
+            result["route"] = "error"
         result["execution_time"] = time.time() - start_time
                 # Routing information
                 route = result["route"]
+                route_class = "route-vector" if route == "vectorstore" else "route-wiki"
+                route_emoji = "🗄️" if route == "vectorstore" else "📖"
+                route_name = "Vector Store" if route == "vectorstore" else "Wikipedia"
                 col1, col2, col3 = st.columns(3)
                 with col1:
                 with col2:
                     st.metric("⚡ Execution Time", f"{result['execution_time']:.2f}s")
                 with col3:
+                    num_docs = len(result['documents']) if isinstance(result['documents'], list) else 1
+                    st.metric("📄 Documents", num_docs)
+                # Display AI-generated response
+                st.markdown("### 🤖 AI-Generated Answer")
+                st.markdown(f"""
+                <div style="background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
+                            padding: 1.5rem; border-radius: 10px; margin: 1rem 0;
+                            border-left: 4px solid #667eea;">
+                    {result['generation']}
+                </div>
+                """, unsafe_allow_html=True)
+                # Display source documents in expandable section
+                st.markdown("### 📄 Source Documents")
                 documents = result['documents']
+                if isinstance(documents, list) and documents:
                     for i, doc in enumerate(documents[:5], 1):
+                        with st.expander(f"📌 Source Document {i}", expanded=False):
+                            if hasattr(doc, 'page_content'):
+                                st.markdown(doc.page_content)
+                            else:
+                                st.markdown(str(doc))
+                            if advanced_mode and hasattr(doc, 'metadata') and doc.metadata:
                                 st.markdown("**Metadata:**")
                                 st.json(doc.metadata)
+                elif hasattr(documents, 'page_content'):
+                    with st.expander("📌 Source Document", expanded=False):
+                        st.markdown(documents.page_content)
                 # Store query history
                 if 'query_history' not in st.session_state:
                     "query": query,
                     "route": route_name,
                     "timestamp": datetime.now().strftime("%H:%M:%S"),
+                    "execution_time": result['execution_time'],
+                    "response_preview": result['generation'][:100] + "..." if len(result['generation']) > 100 else result['generation']
                 })
             except Exception as e:
                 st.error(f"❌ Query execution failed: {str(e)}")
+                if st.checkbox("Show error details"):
+                    st.code(traceback.format_exc())
 def render_analytics_tab():
     """Render system analytics and monitoring"""

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,36 @@

+# ==================== Docker Compose ====================
+# IMSKOS - Intelligent Multi-Source Knowledge Orchestration System
+# Docker Compose configuration for local development and deployment
+version: '3.8'
+services:
+  imskos:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    container_name: imskos-app
+    ports:
+      - "8501:8501"
+    environment:
+      - ASTRA_DB_APPLICATION_TOKEN=${ASTRA_DB_APPLICATION_TOKEN}
+      - ASTRA_DB_ID=${ASTRA_DB_ID}
+      - GROQ_API_KEY=${GROQ_API_KEY}
+    env_file:
+      - .env
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8501/_stcore/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+    volumes:
+      # Mount for development (optional, remove for production)
+      - ./app.py:/app/app.py:ro
+    networks:
+      - imskos-network
+networks:
+  imskos-network:
+    driver: bridge

requirements.txt CHANGED Viewed

@@ -1,34 +1,34 @@
 # ==================== Core Framework ====================
-streamlit==1.31.0
-python-dotenv==1.0.0
 # ==================== LangChain Ecosystem ====================
-langchain==0.1.16
-langchain-community==0.0.38
-langchain-core==0.1.46
-langchain-groq==0.1.3
-langchain-huggingface==0.0.1
-langgraph==0.0.43
-langchainhub==0.1.15
 # ==================== Vector Database & Embeddings ====================
-cassio==0.1.4
-sentence-transformers==2.5.1
 # ==================== Document Processing ====================
-tiktoken==0.6.0
-beautifulsoup4==4.12.3
-lxml==5.1.0
 # ==================== External APIs & Tools ====================
-wikipedia==1.4.0
-arxiv==2.1.0
 # ==================== Data & Utilities ====================
-pandas==2.2.1
-pydantic==2.6.4
-typing-extensions==4.10.0
-# ==================== Optional: Performance & Monitoring ====================
-# psutil==5.9.8
-# prometheus-client==0.20.0

 # ==================== Core Framework ====================
+streamlit>=1.31.0,<2.0.0
+python-dotenv>=1.0.0
 # ==================== LangChain Ecosystem ====================
+langchain>=0.1.16
+langchain-community>=0.0.38
+langchain-core>=0.1.46
+langchain-groq>=0.1.3
+langchain-huggingface>=0.0.1
+langchain-text-splitters>=0.0.1
+langgraph>=0.0.43
+langchainhub>=0.1.15
 # ==================== Vector Database & Embeddings ====================
+cassio>=0.1.4
+sentence-transformers>=2.5.1
 # ==================== Document Processing ====================
+tiktoken>=0.6.0
+beautifulsoup4>=4.12.3
+lxml>=5.1.0
 # ==================== External APIs & Tools ====================
+wikipedia>=1.4.0
 # ==================== Data & Utilities ====================
+pandas>=2.2.1
+pydantic>=2.6.4
+typing-extensions>=4.10.0
+# ==================== HTTP & Networking ====================
+requests>=2.31.0
+aiohttp>=3.9.0