Spaces:

deenaik
/

hpmor

Build error

App Files Files Community

deenaik commited on Oct 15, 2025

Commit

6ef4823

0 Parent(s):

Initial commit

Browse files

Files changed (17) hide show

.env +30 -0
.gitignore +15 -0
.python-version +1 -0
README.md +0 -0
data/processed/chapters.json +0 -0
data/processed/documents.json +0 -0
data/raw/hpmor.html +0 -0
main.py +171 -0
pyproject.toml +20 -0
src/__init__.py +1 -0
src/chat_interface.py +244 -0
src/config.py +104 -0
src/document_processor.py +218 -0
src/model_chain.py +322 -0
src/rag_engine.py +231 -0
src/vector_store.py +198 -0
uv.lock +0 -0

.env ADDED Viewed

	@@ -0,0 +1,30 @@

+# Groq API Configuration
+GROQ_API_KEY=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
+# Ollama Configuration
+OLLAMA_HOST=http://localhost:11434
+# Model Configuration
+LOCAL_MODEL_SMALL=llama3.2:3b
+LOCAL_MODEL_LARGE=llama3.1:8b
+GROQ_MODEL=llama-3.3-70b-versatile
+# Embedding Model
+EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
+# Processing Parameters
+CHUNK_SIZE=1000
+CHUNK_OVERLAP=200
+TOP_K_RETRIEVAL=5
+# Model Selection Thresholds
+COMPLEXITY_THRESHOLD=0.7
+MAX_LOCAL_CONTEXT_SIZE=4000
+# ChromaDB Settings
+CHROMA_PERSIST_DIR=./chroma_db
+COLLECTION_NAME=hpmor_collection
+# Gradio Settings
+GRADIO_SERVER_PORT=7860
+GRADIO_SHARE=False

.gitignore ADDED Viewed

	@@ -0,0 +1,15 @@

+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+# Virtual environments
+.venv
+# Database
+chroma_db/
+blobs/
+models/

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.11

README.md ADDED Viewed

Binary file (4.05 kB). View file

data/processed/chapters.json ADDED Viewed

The diff for this file is too large to render. See raw diff

data/processed/documents.json ADDED Viewed

The diff for this file is too large to render. See raw diff

data/raw/hpmor.html ADDED Viewed

The diff for this file is too large to render. See raw diff

main.py ADDED Viewed

	@@ -0,0 +1,171 @@

+#!/usr/bin/env python3
+"""Main entry point for HPMOR Q&A System."""
+import sys
+import argparse
+from pathlib import Path
+# Add src to path
+sys.path.insert(0, str(Path(__file__).parent))
+from src.config import config
+from src.document_processor import HPMORProcessor
+from src.vector_store import VectorStoreManager
+from src.rag_engine import RAGEngine
+from src.chat_interface import ChatInterface
+def setup_system(force_recreate: bool = False):
+    """Set up the HPMOR Q&A system."""
+    print("="*80)
+    print("HPMOR Q&A System Setup")
+    print("="*80)
+    # Process documents
+    print("\n1. Processing HPMOR document...")
+    processor = HPMORProcessor()
+    documents = processor.process(force_reprocess=force_recreate)
+    print(f"   ✓ Processed {len(documents)} chunks")
+    # Create vector index
+    print("\n2. Creating vector index...")
+    vector_store = VectorStoreManager()
+    index = vector_store.get_or_create_index(documents, force_recreate=force_recreate)
+    stats = vector_store.get_stats()
+    print(f"   ✓ Index created with {stats['num_vectors']} vectors")
+    print("\n✅ Setup complete! The system is ready to use.")
+    return True
+def test_system():
+    """Test the system with sample queries."""
+    print("="*80)
+    print("HPMOR Q&A System Test")
+    print("="*80)
+    engine = RAGEngine(force_recreate=False)
+    test_questions = [
+        "What is Harry Potter's full name in HPMOR?",
+        "How does Harry first react to learning about magic?",
+    ]
+    for question in test_questions:
+        print(f"\nQ: {question}")
+        response = engine.query(question, top_k=3)
+        if isinstance(response["answer"], str):
+            answer = response["answer"]
+        else:
+            answer = str(response["answer"])
+        print(f"A: {answer[:500]}...")
+        print(f"   (Model: {response['model_used']})")
+def check_ollama():
+    """Check if Ollama is installed and running."""
+    import httpx
+    print("\nChecking Ollama status...")
+    try:
+        response = httpx.get(f"{config.ollama_host}/api/tags", timeout=2.0)
+        if response.status_code == 200:
+            print("✓ Ollama is running")
+            data = response.json()
+            if data.get("models"):
+                print(f"  Available models: {', '.join([m['name'] for m in data['models']])}")
+            else:
+                print("  ⚠ No models installed. Run: ollama pull llama3.2:7b")
+            return True
+        else:
+            print("✗ Ollama is not responding correctly")
+            return False
+    except Exception as e:
+        print(f"✗ Ollama is not running. Please start it with: ollama serve")
+        print(f"  Error: {e}")
+        return False
+def main():
+    """Main entry point."""
+    parser = argparse.ArgumentParser(description="HPMOR Q&A System")
+    parser.add_argument(
+        "command",
+        choices=["setup", "chat", "test", "check"],
+        help="Command to run"
+    )
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        help="Force recreate index and reprocess documents"
+    )
+    args = parser.parse_args()
+    if args.command == "setup":
+        setup_system(force_recreate=args.force)
+    elif args.command == "check":
+        print("System Check")
+        print("-" * 40)
+        # Check Ollama
+        ollama_ok = check_ollama()
+        # Check Groq
+        print("\nChecking Groq API...")
+        if config.has_groq_api():
+            print("✓ Groq API key configured")
+        else:
+            print("✗ Groq API key not configured")
+            print("  Add your key to .env file")
+        # Check data
+        print("\nChecking data files...")
+        if config.hpmor_file.exists():
+            print(f"✓ HPMOR file found: {config.hpmor_file}")
+        else:
+            print(f"✗ HPMOR file not found: {config.hpmor_file}")
+        # Check processed data
+        processed_docs = config.processed_data_dir / "documents.json"
+        if processed_docs.exists():
+            print(f"✓ Processed documents found")
+        else:
+            print("✗ No processed documents. Run: python main.py setup")
+    elif args.command == "test":
+        test_system()
+    elif args.command == "chat":
+        print("="*80)
+        print("HPMOR Q&A Chat Interface")
+        print("="*80)
+        # Check system
+        check_ollama()
+        if not config.has_groq_api():
+            print("\n⚠ Warning: Groq API key not configured.")
+            print("  The system will only use local models (if Ollama is running).")
+            print("  For best performance, add your Groq API key to the .env file.")
+        # Check if setup is needed
+        processed_docs = config.processed_data_dir / "documents.json"
+        if not processed_docs.exists():
+            print("\n⚠ No processed documents found. Running setup...")
+            setup_system()
+        # Launch chat interface
+        print("\nStarting chat interface...")
+        chat = ChatInterface()
+        chat.launch()
+if __name__ == "__main__":
+    if len(sys.argv) == 1:
+        # No arguments provided, default to chat
+        sys.argv.append("chat")
+    main()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,20 @@

+[project]
+name = "hpmor-qa"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "chromadb>=1.1.1",
+    "gradio>=5.49.1",
+    "httpx>=0.28.1",
+    "langchain>=0.3.27",
+    "langchain-groq>=0.3.8",
+    "litellm>=1.78.0",
+    "llama-index>=0.14.4",
+    "llama-index-embeddings-huggingface>=0.6.1",
+    "llama-index-llms-groq>=0.4.1",
+    "llama-index-llms-ollama>=0.8.0",
+    "llama-index-vector-stores-chroma>=0.5.3",
+    "lxml>=6.0.2",
+]

src/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # HPMOR Q&A System

src/chat_interface.py ADDED Viewed

	@@ -0,0 +1,244 @@

+"""Gradio chat interface for HPMOR Q&A system."""
+import gradio as gr
+import json
+from typing import List, Tuple, Optional
+from datetime import datetime
+from src.rag_engine import RAGEngine
+from src.model_chain import ModelType
+from src.config import config
+class ChatInterface:
+    """Gradio-based chat interface for HPMOR Q&A."""
+    def __init__(self):
+        """Initialize the chat interface."""
+        print("Initializing HPMOR Q&A Chat Interface...")
+        self.engine = RAGEngine(force_recreate=False)
+        self.conversation_history = []
+    def format_sources(self, sources: List[dict]) -> str:
+        """Format sources for display."""
+        if not sources:
+            return "No sources found"
+        formatted = []
+        for i, source in enumerate(sources, 1):
+            formatted.append(
+                f"**Source {i}** - Chapter {source['chapter_number']}: {source['chapter_title']}\n"
+                f"Relevance Score: {source['score']:.2f}\n"
+                f"Preview: *{source['text_preview'][:150]}...*"
+            )
+        return "\n\n".join(formatted)
+    def process_message(
+        self,
+        message: str,
+        history: List[List[str]],
+        model_choice: str,
+        top_k: int,
+        show_sources: bool
+    ) -> Tuple[str, str, str]:
+        """Process a chat message and return response."""
+        if not message:
+            return "", "", "Please enter a question."
+        # Convert model choice to enum
+        model_map = {
+            "Auto (Intelligent Routing)": None,
+            "Local Small (Fast)": ModelType.LOCAL_SMALL,
+            "Local Large (Better)": ModelType.LOCAL_LARGE,
+            "Groq API (Best)": ModelType.GROQ_API
+        }
+        force_model = model_map.get(model_choice)
+        # Convert history to messages format
+        messages = []
+        for user_msg, assistant_msg in history:
+            if user_msg:
+                messages.append({"role": "user", "content": user_msg})
+            if assistant_msg:
+                messages.append({"role": "assistant", "content": assistant_msg})
+        messages.append({"role": "user", "content": message})
+        try:
+            # Get response from engine
+            response = self.engine.chat(messages, stream=False)
+            # Extract answer
+            if isinstance(response.get("answer"), str):
+                answer = response["answer"]
+            else:
+                # Handle LlamaIndex response object
+                answer = str(response.get("answer", "No response generated"))
+            # Format model info
+            model_info = f"**Model Used:** {response.get('model_used', 'Unknown')}"
+            if response.get("fallback_used"):
+                model_info += " (via fallback)"
+            model_info += f"\n**Context Size:** {response.get('context_size', 0)} characters"
+            # Format sources if requested
+            sources_text = ""
+            if show_sources and response.get("sources"):
+                sources_text = self.format_sources(response["sources"])
+            return answer, sources_text, model_info
+        except Exception as e:
+            error_msg = f"Error: {str(e)}"
+            return error_msg, "", "Error occurred"
+    def clear_conversation(self):
+        """Clear conversation history and cache."""
+        self.conversation_history = []
+        self.engine.clear_cache()
+        return None, "", "", "Conversation cleared"
+    def create_interface(self) -> gr.Blocks:
+        """Create the Gradio interface."""
+        with gr.Blocks(title="HPMOR Q&A System", theme=gr.themes.Soft()) as interface:
+            gr.Markdown(
+                """
+                # 📚 Harry Potter and the Methods of Rationality - Q&A System
+                Ask questions about HPMOR and get intelligent answers powered by RAG (Retrieval-Augmented Generation).
+                The system uses local models when possible and falls back to Groq API for complex queries.
+                """
+            )
+            with gr.Row():
+                with gr.Column(scale=2):
+                    chatbot = gr.Chatbot(
+                        label="Chat",
+                        height=500,
+                        show_copy_button=True
+                    )
+                    with gr.Row():
+                        msg_input = gr.Textbox(
+                            label="Your Question",
+                            placeholder="Ask anything about HPMOR...",
+                            lines=2,
+                            scale=4
+                        )
+                        submit_btn = gr.Button("Send", variant="primary", scale=1)
+                with gr.Column(scale=1):
+                    gr.Markdown("### Settings")
+                    model_choice = gr.Radio(
+                        choices=[
+                            "Auto (Intelligent Routing)",
+                            "Local Small (Fast)",
+                            "Local Large (Better)",
+                            "Groq API (Best)"
+                        ],
+                        value="Auto (Intelligent Routing)",
+                        label="Model Selection"
+                    )
+                    top_k = gr.Slider(
+                        minimum=1,
+                        maximum=10,
+                        value=5,
+                        step=1,
+                        label="Number of Context Chunks"
+                    )
+                    show_sources = gr.Checkbox(
+                        value=True,
+                        label="Show Sources"
+                    )
+                    clear_btn = gr.Button("Clear Conversation", variant="secondary")
+                    gr.Markdown("### Model Info")
+                    model_info = gr.Markdown(
+                        value="Ready to answer questions",
+                        elem_classes=["model-info"]
+                    )
+            with gr.Row():
+                sources_display = gr.Markdown(
+                    label="Retrieved Sources",
+                    value="",
+                    visible=True
+                )
+            # Example questions
+            gr.Examples(
+                examples=[
+                    "What is Harry's initial reaction to learning about magic?",
+                    "How does Harry apply the scientific method to understand magic?",
+                    "What are the key differences between Harry and Hermione's approaches to learning?",
+                    "Explain the concept of 'rationality' as presented in the story",
+                    "What magical experiments does Harry conduct?",
+                ],
+                inputs=msg_input,
+                label="Example Questions"
+            )
+            # Event handlers
+            def respond(message, history, model, topk, sources):
+                """Handle message submission."""
+                answer, sources_text, info = self.process_message(
+                    message, history, model, topk, sources
+                )
+                history.append([message, answer])
+                return "", history, sources_text, info
+            msg_input.submit(
+                respond,
+                inputs=[msg_input, chatbot, model_choice, top_k, show_sources],
+                outputs=[msg_input, chatbot, sources_display, model_info]
+            )
+            submit_btn.click(
+                respond,
+                inputs=[msg_input, chatbot, model_choice, top_k, show_sources],
+                outputs=[msg_input, chatbot, sources_display, model_info]
+            )
+            clear_btn.click(
+                lambda: self.clear_conversation(),
+                outputs=[chatbot, sources_display, msg_input, model_info]
+            )
+            # Add custom CSS
+            interface.css = """
+                .model-info {
+                    background-color: #f0f0f0;
+                    padding: 10px;
+                    border-radius: 5px;
+                    font-size: 0.9em;
+                }
+            """
+        return interface
+    def launch(self):
+        """Launch the Gradio interface."""
+        interface = self.create_interface()
+        print(f"\nLaunching HPMOR Q&A Chat Interface...")
+        print(f"Server will be available at: http://localhost:{config.gradio_server_port}")
+        interface.launch(
+            server_name="0.0.0.0",
+            server_port=config.gradio_server_port,
+            share=config.gradio_share,
+            favicon_path=None
+        )
+def main():
+    """Launch the chat interface."""
+    chat = ChatInterface()
+    chat.launch()
+if __name__ == "__main__":
+    main()

src/config.py ADDED Viewed

	@@ -0,0 +1,104 @@

+"""Configuration management for HPMOR Q&A System."""
+import os
+from pathlib import Path
+from typing import Optional
+from pydantic import BaseModel, Field
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+class Config(BaseModel):
+    """Application configuration."""
+    # API Keys
+    groq_api_key: Optional[str] = Field(default=os.getenv("GROQ_API_KEY"))
+    # Ollama Settings
+    ollama_host: str = Field(default=os.getenv("OLLAMA_HOST", "http://localhost:11434"))
+    # Model Names
+    local_model_small: str = Field(default=os.getenv("LOCAL_MODEL_SMALL", "llama3.2:7b"))
+    local_model_large: str = Field(default=os.getenv("LOCAL_MODEL_LARGE", "llama3.2:13b"))
+    groq_model: str = Field(default=os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile"))
+    # Embedding Model
+    embedding_model: str = Field(
+        default=os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
+    )
+    # Processing Parameters
+    chunk_size: int = Field(default=int(os.getenv("CHUNK_SIZE", "1000")))
+    chunk_overlap: int = Field(default=int(os.getenv("CHUNK_OVERLAP", "200")))
+    top_k_retrieval: int = Field(default=int(os.getenv("TOP_K_RETRIEVAL", "5")))
+    # Model Selection Thresholds
+    complexity_threshold: float = Field(
+        default=float(os.getenv("COMPLEXITY_THRESHOLD", "0.7"))
+    )
+    max_local_context_size: int = Field(
+        default=int(os.getenv("MAX_LOCAL_CONTEXT_SIZE", "4000"))
+    )
+    # ChromaDB Settings
+    chroma_persist_dir: Path = Field(
+        default=Path(os.getenv("CHROMA_PERSIST_DIR", "./chroma_db"))
+    )
+    collection_name: str = Field(
+        default=os.getenv("COLLECTION_NAME", "hpmor_collection")
+    )
+    # Gradio Settings
+    gradio_server_port: int = Field(
+        default=int(os.getenv("GRADIO_SERVER_PORT", "7860"))
+    )
+    gradio_share: bool = Field(
+        default=os.getenv("GRADIO_SHARE", "False").lower() == "true"
+    )
+    # File Paths
+    data_dir: Path = Field(default=Path("data"))
+    raw_data_dir: Path = Field(default=Path("data/raw"))
+    processed_data_dir: Path = Field(default=Path("data/processed"))
+    hpmor_file: Path = Field(default=Path("data/raw/hpmor.html"))
+    def validate_paths(self) -> None:
+        """Create necessary directories if they don't exist."""
+        for dir_path in [self.data_dir, self.raw_data_dir, self.processed_data_dir]:
+            dir_path.mkdir(parents=True, exist_ok=True)
+        self.chroma_persist_dir.mkdir(parents=True, exist_ok=True)
+    def has_groq_api(self) -> bool:
+        """Check if Groq API key is configured."""
+        return self.groq_api_key and self.groq_api_key != "your_groq_api_key_here"
+    def get_model_config(self, model_type: str) -> dict:
+        """Get configuration for a specific model type."""
+        configs = {
+            "local_small": {
+                "model": self.local_model_small,
+                "type": "ollama",
+                "max_tokens": 2048,
+                "temperature": 0.7,
+            },
+            "local_large": {
+                "model": self.local_model_large,
+                "type": "ollama",
+                "max_tokens": 4096,
+                "temperature": 0.7,
+            },
+            "groq": {
+                "model": self.groq_model,
+                "type": "groq",
+                "api_key": self.groq_api_key,
+                "max_tokens": 8192,
+                "temperature": 0.7,
+            },
+        }
+        return configs.get(model_type, configs["local_small"])
+# Create global config instance
+config = Config()
+config.validate_paths()

src/document_processor.py ADDED Viewed

	@@ -0,0 +1,218 @@

+"""Document processor for parsing and chunking HPMOR HTML."""
+import re
+import json
+from pathlib import Path
+from typing import List, Dict, Optional
+from bs4 import BeautifulSoup
+from llama_index.core import Document
+from llama_index.core.node_parser import SentenceSplitter
+from src.config import config
+class HPMORProcessor:
+    """Process HPMOR HTML document into chunks for RAG."""
+    def __init__(self):
+        self.chunk_size = config.chunk_size
+        self.chunk_overlap = config.chunk_overlap
+        self.processed_dir = config.processed_data_dir
+    def parse_html(self, file_path: Path) -> List[Dict]:
+        """Parse HTML file and extract chapters with metadata."""
+        print(f"Parsing HTML file: {file_path}")
+        with open(file_path, 'r', encoding='utf-8') as f:
+            html_content = f.read()
+        soup = BeautifulSoup(html_content, 'lxml')
+        # Remove style and script tags
+        for tag in soup(['style', 'script']):
+            tag.decompose()
+        # Try to identify chapters by common patterns
+        chapters = []
+        chapter_pattern = re.compile(r'Chapter\s+(\d+)', re.IGNORECASE)
+        # Find all h1, h2, h3 tags that might be chapter headers
+        headers = soup.find_all(['h1', 'h2', 'h3'])
+        current_chapter = None
+        current_content = []
+        chapter_num = 0
+        for header in headers:
+            header_text = header.get_text(strip=True)
+            match = chapter_pattern.search(header_text)
+            if match:
+                # Save previous chapter if exists
+                if current_chapter and current_content:
+                    chapters.append({
+                        'chapter_number': current_chapter['number'],
+                        'chapter_title': current_chapter['title'],
+                        'content': '\n'.join(current_content)
+                    })
+                # Start new chapter
+                chapter_num = int(match.group(1))
+                current_chapter = {
+                    'number': chapter_num,
+                    'title': header_text
+                }
+                current_content = []
+                # Get content after this header until next chapter
+                for sibling in header.find_next_siblings():
+                    if sibling.name in ['h1', 'h2', 'h3']:
+                        if chapter_pattern.search(sibling.get_text()):
+                            break
+                    text = sibling.get_text(strip=True)
+                    if text:
+                        current_content.append(text)
+        # Add the last chapter
+        if current_chapter and current_content:
+            chapters.append({
+                'chapter_number': current_chapter['number'],
+                'chapter_title': current_chapter['title'],
+                'content': '\n'.join(current_content)
+            })
+        # If no chapters found, treat entire content as one document
+        if not chapters:
+            print("No chapter structure found, processing as single document")
+            text_content = soup.get_text(separator='\n', strip=True)
+            chapters = [{
+                'chapter_number': 0,
+                'chapter_title': 'Harry Potter and the Methods of Rationality',
+                'content': text_content
+            }]
+        print(f"Extracted {len(chapters)} chapters")
+        return chapters
+    def create_chunks(self, chapters: List[Dict]) -> List[Document]:
+        """Create overlapping chunks from chapters."""
+        print(f"Creating chunks with size={self.chunk_size}, overlap={self.chunk_overlap}")
+        documents = []
+        splitter = SentenceSplitter(
+            chunk_size=self.chunk_size,
+            chunk_overlap=self.chunk_overlap,
+        )
+        for chapter in chapters:
+            # Create a document for the chapter
+            chapter_doc = Document(
+                text=chapter['content'],
+                metadata={
+                    'chapter_number': chapter['chapter_number'],
+                    'chapter_title': chapter['chapter_title'],
+                    'source': 'hpmor.html'
+                }
+            )
+            # Split into chunks
+            nodes = splitter.get_nodes_from_documents([chapter_doc])
+            # Convert nodes back to documents with enhanced metadata
+            for i, node in enumerate(nodes):
+                doc = Document(
+                    text=node.text,
+                    metadata={
+                        **chapter_doc.metadata,
+                        'chunk_id': f"ch{chapter['chapter_number']}_chunk{i}",
+                        'chunk_index': i,
+                        'total_chunks_in_chapter': len(nodes)
+                    }
+                )
+                documents.append(doc)
+        print(f"Created {len(documents)} chunks total")
+        return documents
+    def save_processed_data(self, documents: List[Document], chapters: List[Dict]) -> None:
+        """Save processed documents and metadata to disk."""
+        # Save documents as JSON for easy loading
+        docs_data = []
+        for doc in documents:
+            docs_data.append({
+                'text': doc.text,
+                'metadata': doc.metadata
+            })
+        docs_file = self.processed_dir / 'documents.json'
+        with open(docs_file, 'w', encoding='utf-8') as f:
+            json.dump(docs_data, f, indent=2, ensure_ascii=False)
+        print(f"Saved {len(docs_data)} documents to {docs_file}")
+        # Save chapter metadata
+        chapters_file = self.processed_dir / 'chapters.json'
+        with open(chapters_file, 'w', encoding='utf-8') as f:
+            json.dump(chapters, f, indent=2, ensure_ascii=False)
+        print(f"Saved chapter metadata to {chapters_file}")
+    def load_processed_data(self) -> Optional[List[Document]]:
+        """Load previously processed documents."""
+        docs_file = self.processed_dir / 'documents.json'
+        if not docs_file.exists():
+            return None
+        with open(docs_file, 'r', encoding='utf-8') as f:
+            docs_data = json.load(f)
+        documents = []
+        for doc_data in docs_data:
+            doc = Document(
+                text=doc_data['text'],
+                metadata=doc_data['metadata']
+            )
+            documents.append(doc)
+        print(f"Loaded {len(documents)} documents from cache")
+        return documents
+    def process(self, force_reprocess: bool = False) -> List[Document]:
+        """Main processing pipeline."""
+        # Check if already processed
+        if not force_reprocess:
+            documents = self.load_processed_data()
+            if documents:
+                return documents
+        # Process from scratch
+        print("Processing HPMOR document from scratch...")
+        if not config.hpmor_file.exists():
+            raise FileNotFoundError(f"HPMOR file not found: {config.hpmor_file}")
+        # Parse HTML
+        chapters = self.parse_html(config.hpmor_file)
+        # Create chunks
+        documents = self.create_chunks(chapters)
+        # Save processed data
+        self.save_processed_data(documents, chapters)
+        return documents
+def main():
+    """Process HPMOR document."""
+    processor = HPMORProcessor()
+    documents = processor.process(force_reprocess=True)
+    print(f"\nProcessing complete! Created {len(documents)} document chunks.")
+    # Show sample
+    if documents:
+        print("\nSample chunk:")
+        print(f"Text: {documents[0].text[:200]}...")
+        print(f"Metadata: {documents[0].metadata}")
+if __name__ == "__main__":
+    main()

src/model_chain.py ADDED Viewed

	@@ -0,0 +1,322 @@

+"""Model chaining logic with Groq fallback."""
+import re
+from typing import Optional, Dict, Any, List
+from enum import Enum
+from llama_index.llms.ollama import Ollama
+from llama_index.llms.groq import Groq
+from llama_index.core.llms import LLM
+from litellm import completion
+import httpx
+from src.config import config
+class ModelType(Enum):
+    """Model types for routing."""
+    LOCAL_SMALL = "local_small"
+    LOCAL_LARGE = "local_large"
+    GROQ_API = "groq"
+class QueryComplexity(Enum):
+    """Query complexity levels."""
+    SIMPLE = "simple"  # Factual questions, definitions
+    MODERATE = "moderate"  # Analysis, reasoning
+    COMPLEX = "complex"  # Creative, multi-step reasoning
+class ModelChain:
+    """Intelligent model routing with fallback to Groq."""
+    def __init__(self):
+        self.models = {}
+        self.groq_available = config.has_groq_api()
+        # Initialize models lazily
+        self._ollama_available = None
+    def check_ollama_available(self) -> bool:
+        """Check if Ollama is running and available."""
+        if self._ollama_available is not None:
+            return self._ollama_available
+        try:
+            # Try to connect to Ollama
+            response = httpx.get(f"{config.ollama_host}/api/tags", timeout=2.0)
+            self._ollama_available = response.status_code == 200
+            if self._ollama_available:
+                print("Ollama is available")
+            else:
+                print("Ollama is not responding correctly")
+        except Exception as e:
+            print(f"Ollama not available: {e}")
+            self._ollama_available = False
+        return self._ollama_available
+    def get_model(self, model_type: ModelType) -> Optional[LLM]:
+        """Get or initialize a model."""
+        if model_type in self.models:
+            return self.models[model_type]
+        if model_type == ModelType.GROQ_API:
+            if not self.groq_available:
+                print("Groq API key not configured")
+                return None
+            try:
+                # For groq/compound model, we'll use litellm
+                # Return a wrapper that uses litellm
+                return "groq"  # Special marker for litellm usage
+            except Exception as e:
+                print(f"Failed to initialize Groq: {e}")
+                return None
+        elif model_type in [ModelType.LOCAL_SMALL, ModelType.LOCAL_LARGE]:
+            if not self.check_ollama_available():
+                print("Ollama not available, falling back to Groq")
+                return None
+            model_config = config.get_model_config(model_type.value)
+            try:
+                model = Ollama(
+                    model=model_config["model"],
+                    base_url=config.ollama_host,
+                    temperature=model_config["temperature"],
+                    request_timeout=120.0,
+                )
+                self.models[model_type] = model
+                print(f"Initialized {model_type.value} model: {model_config['model']}")
+                return model
+            except Exception as e:
+                print(f"Failed to initialize Ollama model: {e}")
+                return None
+        return None
+    def analyze_query_complexity(self, query: str, context_size: int = 0) -> QueryComplexity:
+        """Analyze query complexity to determine which model to use."""
+        query_lower = query.lower()
+        # Simple queries - factual questions
+        simple_patterns = [
+            r"what is",
+            r"who is",
+            r"when did",
+            r"where is",
+            r"define",
+            r"list",
+            r"name",
+            r"how many",
+            r"yes or no",
+        ]
+        # Complex queries - requiring reasoning or creativity
+        complex_patterns = [
+            r"explain why",
+            r"analyze",
+            r"compare and contrast",
+            r"what would happen if",
+            r"imagine",
+            r"create",
+            r"write a",
+            r"develop",
+            r"design",
+            r"evaluate",
+            r"critique",
+            r"synthesize",
+        ]
+        # Check for simple patterns
+        for pattern in simple_patterns:
+            if re.search(pattern, query_lower):
+                return QueryComplexity.SIMPLE
+        # Check for complex patterns
+        for pattern in complex_patterns:
+            if re.search(pattern, query_lower):
+                return QueryComplexity.COMPLEX
+        # Check query length and context size
+        if len(query.split()) > 50 or context_size > config.max_local_context_size:
+            return QueryComplexity.COMPLEX
+        # Default to moderate
+        return QueryComplexity.MODERATE
+    def route_query(
+        self,
+        query: str,
+        context: Optional[str] = None,
+        force_model: Optional[ModelType] = None
+    ) -> ModelType:
+        """Determine which model to use for the query."""
+        if force_model:
+            return force_model
+        context_size = len(context) if context else 0
+        complexity = self.analyze_query_complexity(query, context_size)
+        # Check Ollama availability
+        ollama_available = self.check_ollama_available()
+        # Routing logic
+        if complexity == QueryComplexity.SIMPLE:
+            if ollama_available:
+                return ModelType.LOCAL_SMALL
+            elif self.groq_available:
+                return ModelType.GROQ_API
+        elif complexity == QueryComplexity.MODERATE:
+            if ollama_available:
+                return ModelType.LOCAL_LARGE
+            elif self.groq_available:
+                return ModelType.GROQ_API
+        else:  # COMPLEX
+            if self.groq_available:
+                return ModelType.GROQ_API
+            elif ollama_available:
+                return ModelType.LOCAL_LARGE
+        # Final fallback
+        if self.groq_available:
+            return ModelType.GROQ_API
+        elif ollama_available:
+            return ModelType.LOCAL_SMALL
+        else:
+            raise RuntimeError("No models available! Please check Ollama or configure Groq API key.")
+    def generate_response(
+        self,
+        query: str,
+        context: Optional[str] = None,
+        force_model: Optional[ModelType] = None,
+        stream: bool = False
+    ) -> Dict[str, Any]:
+        """Generate response using appropriate model."""
+        # Determine which model to use
+        model_type = self.route_query(query, context, force_model)
+        print(f"Using model: {model_type.value}")
+        # Prepare prompt
+        if context:
+            prompt = f"""Context from Harry Potter and the Methods of Rationality:
+{context}
+Question: {query}
+Please provide a detailed answer based on the context provided above."""
+        else:
+            prompt = query
+        # Try primary model
+        try:
+            model = self.get_model(model_type)
+            if model == "groq":  # Special handling for Groq via litellm
+                # Use litellm for Groq
+                response = completion(
+                    model=f"groq/{config.groq_model}",
+                    messages=[{"role": "user", "content": prompt}],
+                    api_key=config.groq_api_key,
+                    temperature=0.7,
+                    max_tokens=2048,
+                    stream=stream
+                )
+                if stream:
+                    return {
+                        "response": response,
+                        "model_used": model_type.value,
+                        "streaming": True
+                    }
+                else:
+                    return {
+                        "response": response.choices[0].message.content,
+                        "model_used": model_type.value,
+                        "tokens_used": response.usage.total_tokens if hasattr(response, 'usage') else None
+                    }
+            elif model:
+                # Use LlamaIndex model
+                if stream:
+                    response = model.stream_complete(prompt)
+                else:
+                    response = model.complete(prompt)
+                return {
+                    "response": response,
+                    "model_used": model_type.value,
+                    "streaming": stream
+                }
+        except Exception as e:
+            print(f"Error with {model_type.value}: {e}")
+            # Try fallback
+            if model_type != ModelType.GROQ_API and self.groq_available:
+                print("Falling back to Groq API...")
+                model_type = ModelType.GROQ_API
+                try:
+                    response = completion(
+                        model=f"groq/{config.groq_model}",
+                        messages=[{"role": "user", "content": prompt}],
+                        api_key=config.groq_api_key,
+                        temperature=0.7,
+                        max_tokens=2048,
+                        stream=stream
+                    )
+                    if stream:
+                        return {
+                            "response": response,
+                            "model_used": model_type.value,
+                            "streaming": True,
+                            "fallback": True
+                        }
+                    else:
+                        return {
+                            "response": response.choices[0].message.content,
+                            "model_used": model_type.value,
+                            "tokens_used": response.usage.total_tokens if hasattr(response, 'usage') else None,
+                            "fallback": True
+                        }
+                except Exception as e2:
+                    print(f"Fallback to Groq also failed: {e2}")
+                    raise RuntimeError(f"All models failed. Last error: {e2}")
+        raise RuntimeError("No models available for response generation")
+def main():
+    """Test model chaining."""
+    chain = ModelChain()
+    # Test queries of different complexities
+    test_queries = [
+        ("What is Harry's full name?", QueryComplexity.SIMPLE),
+        ("Explain Harry's reasoning about magic", QueryComplexity.MODERATE),
+        ("Analyze the philosophical implications of Harry's scientific approach to magic", QueryComplexity.COMPLEX),
+    ]
+    for query, expected_complexity in test_queries:
+        print(f"\nQuery: {query}")
+        complexity = chain.analyze_query_complexity(query)
+        print(f"Detected complexity: {complexity}")
+        print(f"Expected complexity: {expected_complexity}")
+        try:
+            model_type = chain.route_query(query)
+            print(f"Selected model: {model_type.value}")
+            # Generate response
+            result = chain.generate_response(query)
+            print(f"Model used: {result['model_used']}")
+            print(f"Response preview: {str(result['response'])[:200]}...")
+        except Exception as e:
+            print(f"Error: {e}")
+if __name__ == "__main__":
+    main()

src/rag_engine.py ADDED Viewed

	@@ -0,0 +1,231 @@

+"""RAG query engine for HPMOR Q&A system."""
+from typing import Optional, List, Dict, Any
+import json
+from pathlib import Path
+from llama_index.core import Document
+from src.document_processor import HPMORProcessor
+from src.vector_store import VectorStoreManager
+from src.model_chain import ModelChain, ModelType
+from src.config import config
+class RAGEngine:
+    """Main RAG engine combining retrieval and generation."""
+    def __init__(self, force_recreate: bool = False):
+        """Initialize RAG engine components."""
+        print("Initializing RAG Engine...")
+        # Initialize components
+        self.processor = HPMORProcessor()
+        self.vector_store = VectorStoreManager()
+        self.model_chain = ModelChain()
+        # Process and index documents
+        self._initialize_index(force_recreate)
+        # Cache for responses
+        self.response_cache = {}
+    def _initialize_index(self, force_recreate: bool = False):
+        """Initialize or load the vector index."""
+        # Process documents
+        documents = self.processor.process(force_reprocess=force_recreate)
+        # Create or load index
+        self.index = self.vector_store.get_or_create_index(
+            documents=documents,
+            force_recreate=force_recreate
+        )
+        print(f"Index ready with {len(documents)} documents")
+    def retrieve_context(self, query: str, top_k: Optional[int] = None) -> tuple[str, List[Dict]]:
+        """Retrieve relevant context for a query."""
+        if top_k is None:
+            top_k = config.top_k_retrieval
+        # Query vector store
+        nodes = self.vector_store.query(query, top_k=top_k)
+        # Format context
+        context_parts = []
+        source_info = []
+        for i, node in enumerate(nodes, 1):
+            # Add to context
+            context_parts.append(f"[Excerpt {i}]\n{node.text}")
+            # Collect source info
+            source_info.append({
+                "chunk_id": node.metadata.get("chunk_id", "unknown"),
+                "chapter_number": node.metadata.get("chapter_number", 0),
+                "chapter_title": node.metadata.get("chapter_title", "Unknown"),
+                "score": float(node.score) if node.score else 0.0,
+                "text_preview": node.text[:200] + "..." if len(node.text) > 200 else node.text
+            })
+        context = "\n\n".join(context_parts)
+        return context, source_info
+    def query(
+        self,
+        question: str,
+        top_k: Optional[int] = None,
+        force_model: Optional[ModelType] = None,
+        return_sources: bool = True,
+        use_cache: bool = True,
+        stream: bool = False
+    ) -> Dict[str, Any]:
+        """Execute RAG query with retrieval and generation."""
+        # Check cache
+        cache_key = f"{question}_{top_k}_{force_model}"
+        if use_cache and cache_key in self.response_cache and not stream:
+            print("Returning cached response")
+            return self.response_cache[cache_key]
+        # Retrieve context
+        print(f"Retrieving context for: {question[:100]}...")
+        context, sources = self.retrieve_context(question, top_k)
+        # Generate response
+        print("Generating response...")
+        try:
+            result = self.model_chain.generate_response(
+                query=question,
+                context=context,
+                force_model=force_model,
+                stream=stream
+            )
+            # Prepare full response
+            full_response = {
+                "question": question,
+                "answer": result.get("response"),
+                "model_used": result.get("model_used"),
+                "sources": sources if return_sources else None,
+                "context_size": len(context),
+                "streaming": stream,
+                "fallback_used": result.get("fallback", False)
+            }
+            # Cache if not streaming
+            if use_cache and not stream:
+                self.response_cache[cache_key] = full_response
+            return full_response
+        except Exception as e:
+            print(f"Error generating response: {e}")
+            return {
+                "question": question,
+                "answer": f"Error generating response: {str(e)}",
+                "model_used": None,
+                "sources": sources if return_sources else None,
+                "error": str(e)
+            }
+    def chat(
+        self,
+        messages: List[Dict[str, str]],
+        stream: bool = False
+    ) -> Dict[str, Any]:
+        """Handle chat conversation with context."""
+        # Get the latest user message
+        if not messages or messages[-1]["role"] != "user":
+            return {"error": "No user message found"}
+        current_question = messages[-1]["content"]
+        # Build conversation context if multiple messages
+        conversation_context = ""
+        if len(messages) > 1:
+            prev_messages = messages[:-1][-4:]  # Keep last 4 messages for context
+            for msg in prev_messages:
+                role = "Human" if msg["role"] == "user" else "Assistant"
+                conversation_context += f"{role}: {msg['content']}\n\n"
+        # Modify question to include conversation context
+        if conversation_context:
+            full_query = f"""Previous conversation:
+{conversation_context}
+Current question: {current_question}"""
+        else:
+            full_query = current_question
+        # Execute RAG query
+        response = self.query(
+            question=full_query,
+            return_sources=True,
+            stream=stream
+        )
+        return response
+    def get_stats(self) -> Dict[str, Any]:
+        """Get statistics about the RAG engine."""
+        vector_stats = self.vector_store.get_stats()
+        stats = {
+            "vector_store": vector_stats,
+            "cache_size": len(self.response_cache),
+            "models_available": {
+                "ollama": self.model_chain.check_ollama_available(),
+                "groq": self.model_chain.groq_available
+            }
+        }
+        return stats
+    def clear_cache(self):
+        """Clear response cache."""
+        self.response_cache = {}
+        print("Response cache cleared")
+def main():
+    """Test RAG engine."""
+    # Initialize engine
+    print("Initializing RAG engine...")
+    engine = RAGEngine(force_recreate=False)
+    # Test queries
+    test_questions = [
+        "What is Harry Potter's approach to understanding magic?",
+        "How does Harry react when he first learns about magic?",
+        "What are Harry's thoughts on the scientific method?",
+    ]
+    for question in test_questions:
+        print(f"\n{'='*80}")
+        print(f"Question: {question}")
+        print(f"{'='*80}")
+        response = engine.query(question, top_k=3)
+        print(f"\nModel used: {response['model_used']}")
+        print(f"Context size: {response['context_size']} characters")
+        if response.get("fallback_used"):
+            print("(Fallback to Groq was used)")
+        print(f"\nAnswer:\n{response['answer']}")
+        if response.get("sources"):
+            print(f"\nSources ({len(response['sources'])} chunks):")
+            for i, source in enumerate(response['sources'], 1):
+                print(f"  {i}. Chapter {source['chapter_number']}: {source['chapter_title']}")
+                print(f"     Score: {source['score']:.4f}")
+    # Show stats
+    print(f"\n{'='*80}")
+    print("Engine Statistics:")
+    stats = engine.get_stats()
+    print(json.dumps(stats, indent=2))
+if __name__ == "__main__":
+    main()

src/vector_store.py ADDED Viewed

	@@ -0,0 +1,198 @@

+"""Vector store management for document embeddings."""
+import os
+from typing import List, Optional
+from pathlib import Path
+import chromadb
+from chromadb.config import Settings
+from llama_index.core import Document, VectorStoreIndex, StorageContext
+from llama_index.vector_stores.chroma import ChromaVectorStore
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.core.node_parser import SentenceSplitter
+from src.config import config
+class VectorStoreManager:
+    """Manage ChromaDB vector store for document embeddings."""
+    def __init__(self):
+        self.collection_name = config.collection_name
+        self.persist_dir = str(config.chroma_persist_dir)
+        self.embedding_model = config.embedding_model
+        # Initialize embedding model
+        print(f"Loading embedding model: {self.embedding_model}")
+        self.embed_model = HuggingFaceEmbedding(
+            model_name=self.embedding_model,
+            cache_folder="./models"
+        )
+        # Initialize ChromaDB client
+        self.chroma_client = chromadb.PersistentClient(
+            path=self.persist_dir,
+            settings=Settings(anonymized_telemetry=False)
+        )
+        # Get or create collection
+        self.collection = None
+        self.vector_store = None
+        self.index = None
+    def initialize_collection(self, reset: bool = False) -> None:
+        """Initialize ChromaDB collection."""
+        if reset:
+            # Delete existing collection if it exists
+            try:
+                self.chroma_client.delete_collection(name=self.collection_name)
+                print(f"Deleted existing collection: {self.collection_name}")
+            except Exception:
+                pass
+        # Create or get collection
+        self.collection = self.chroma_client.get_or_create_collection(
+            name=self.collection_name,
+            metadata={"hnsw:space": "cosine"}
+        )
+        print(f"Using collection: {self.collection_name}")
+        # Initialize vector store
+        self.vector_store = ChromaVectorStore(
+            chroma_collection=self.collection,
+            embedding_function=self.embed_model
+        )
+    def create_index(self, documents: List[Document], show_progress: bool = True) -> VectorStoreIndex:
+        """Create vector index from documents."""
+        if not self.vector_store:
+            self.initialize_collection()
+        print(f"Creating index from {len(documents)} documents...")
+        # Create storage context
+        storage_context = StorageContext.from_defaults(
+            vector_store=self.vector_store
+        )
+        # Create index with documents
+        self.index = VectorStoreIndex.from_documents(
+            documents,
+            storage_context=storage_context,
+            embed_model=self.embed_model,
+            show_progress=show_progress
+        )
+        print("Index created successfully!")
+        return self.index
+    def load_index(self) -> Optional[VectorStoreIndex]:
+        """Load existing index from storage."""
+        if not self.vector_store:
+            self.initialize_collection()
+        # Check if collection has data
+        if self.collection.count() == 0:
+            print("No existing index found in ChromaDB")
+            return None
+        print(f"Loading index with {self.collection.count()} vectors")
+        # Create storage context
+        storage_context = StorageContext.from_defaults(
+            vector_store=self.vector_store
+        )
+        # Load index
+        self.index = VectorStoreIndex.from_vector_store(
+            self.vector_store,
+            storage_context=storage_context,
+            embed_model=self.embed_model
+        )
+        return self.index
+    def get_or_create_index(
+        self,
+        documents: Optional[List[Document]] = None,
+        force_recreate: bool = False
+    ) -> VectorStoreIndex:
+        """Get existing index or create new one."""
+        if not force_recreate:
+            # Try to load existing index
+            index = self.load_index()
+            if index:
+                return index
+        # Create new index
+        if not documents:
+            raise ValueError("No documents provided for creating index")
+        self.initialize_collection(reset=True)
+        return self.create_index(documents)
+    def query(self, query_text: str, top_k: int = None) -> List:
+        """Query the vector store."""
+        if not self.index:
+            raise ValueError("Index not initialized. Call get_or_create_index first.")
+        if top_k is None:
+            top_k = config.top_k_retrieval
+        # Use retriever directly instead of query engine to avoid LLM requirement
+        retriever = self.index.as_retriever(
+            similarity_top_k=top_k
+        )
+        # Retrieve nodes
+        nodes = retriever.retrieve(query_text)
+        return nodes
+    def get_stats(self) -> dict:
+        """Get statistics about the vector store."""
+        if not self.collection:
+            self.initialize_collection()
+        stats = {
+            "collection_name": self.collection_name,
+            "persist_dir": self.persist_dir,
+            "embedding_model": self.embedding_model,
+            "num_vectors": self.collection.count(),
+            "metadata": self.collection.metadata
+        }
+        return stats
+def main():
+    """Test vector store functionality."""
+    from src.document_processor import HPMORProcessor
+    # Process documents
+    processor = HPMORProcessor()
+    documents = processor.process()
+    # Create vector store
+    vector_store = VectorStoreManager()
+    index = vector_store.get_or_create_index(documents, force_recreate=True)
+    # Get stats
+    stats = vector_store.get_stats()
+    print("\nVector Store Statistics:")
+    for key, value in stats.items():
+        print(f"  {key}: {value}")
+    # Test query
+    test_query = "What is Harry's opinion on magic?"
+    print(f"\nTest query: '{test_query}'")
+    results = vector_store.query(test_query, top_k=3)
+    print(f"\nFound {len(results)} relevant chunks:")
+    for i, node in enumerate(results, 1):
+        print(f"\n{i}. Score: {node.score:.4f}")
+        print(f"   Chapter: {node.metadata.get('chapter_title', 'Unknown')}")
+        print(f"   Text preview: {node.text[:200]}...")
+if __name__ == "__main__":
+    main()

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff