Spaces:

deenaik
/

hpmor

Build error

App Files Files Community

deenaik commited on Oct 15, 2025

Commit

d84fcd9

1 Parent(s): 33bbab3

Deploy: HPMOR Q&A chatbot - 2025-10-15

Browse files

Files changed (5) hide show

.env.example +19 -0
.gitignore +17 -1
README.md +0 -0
app.py +221 -0
requirements.txt +12 -0

.env.example ADDED Viewed

	@@ -0,0 +1,19 @@

+# Groq API Configuration
+GROQ_API_KEY=your_groq_api_key_here
+GROQ_MODEL=llama-3.3-70b-versatile
+# Embedding Model
+EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
+# Processing Parameters
+CHUNK_SIZE=1000
+CHUNK_OVERLAP=200
+TOP_K_RETRIEVAL=5
+# ChromaDB Settings
+CHROMA_PERSIST_DIR=./chroma_db
+COLLECTION_NAME=hpmor_collection
+# Gradio Settings
+GRADIO_SERVER_PORT=7860
+GRADIO_SHARE=False

.gitignore CHANGED Viewed

@@ -9,7 +9,23 @@ wheels/
 # Virtual environments
 .venv
-# Database
 chroma_db/
 blobs/
 models/

 # Virtual environments
 .venv
+# Environment variables (keep .env.example)
+.env
+# Database (will be rebuilt on deployment)
 chroma_db/
 blobs/
+# Downloaded models (embeddings will be re-downloaded)
 models/
+# Keep data files - they are needed for deployment!
+# data/ is NOT ignored
+# data/raw/hpmor.html is required
+# data/processed/ can be regenerated but it's fine to include
+# MacOS
+.DS_Store
+# UV lock file (optional - can be regenerated)
+# uv.lock

README.md CHANGED Viewed

Binary files a/README.md and b/README.md differ

app.py ADDED Viewed

	@@ -0,0 +1,221 @@

+#!/usr/bin/env python3
+"""HuggingFace Spaces app for HPMOR Q&A System - Cloud deployment version."""
+import os
+import sys
+import gradio as gr
+from typing import List, Tuple
+from pathlib import Path
+# Add src to path
+sys.path.insert(0, str(Path(__file__).parent))
+from src.config import config
+from src.document_processor import HPMORProcessor
+from src.vector_store import VectorStoreManager
+from src.rag_engine import RAGEngine
+# Force Groq-only mode for cloud deployment
+os.environ["FORCE_GROQ_ONLY"] = "1"
+class HFChatInterface:
+    """Simplified chat interface for HuggingFace Spaces."""
+    def __init__(self):
+        """Initialize the chat interface."""
+        print("Initializing HPMOR Q&A Chat Interface for HuggingFace Spaces...")
+        # Check if we need to setup
+        processed_docs = config.processed_data_dir / "documents.json"
+        if not processed_docs.exists():
+            print("Setting up system for first time...")
+            self.setup_system()
+        # Initialize RAG engine (Groq-only mode)
+        self.engine = RAGEngine(force_recreate=False)
+        print("System ready!")
+    def setup_system(self):
+        """Set up the HPMOR Q&A system."""
+        print("Processing HPMOR document...")
+        processor = HPMORProcessor()
+        documents = processor.process(force_reprocess=False)
+        print(f"Processed {len(documents)} chunks")
+        print("Creating vector index...")
+        vector_store = VectorStoreManager()
+        vector_store.get_or_create_index(documents, force_recreate=False)
+        print("Setup complete!")
+    def format_sources(self, sources: List[dict]) -> str:
+        """Format sources for display."""
+        if not sources:
+            return ""
+        formatted = []
+        for i, source in enumerate(sources[:3], 1):  # Limit to top 3 sources
+            formatted.append(
+                f"**Source {i}** - Chapter {source['chapter_number']}: {source['chapter_title']}\n"
+                f"Relevance: {source['score']:.2f}\n"
+                f"*{source['text_preview'][:100]}...*"
+            )
+        return "\n\n".join(formatted)
+    def process_message(
+        self,
+        message: str,
+        history: List[List[str]],
+        show_sources: bool
+    ) -> Tuple[str, str]:
+        """Process a chat message and return response."""
+        if not message:
+            return "", "Please enter a question."
+        # Convert history to messages format
+        messages = []
+        for user_msg, assistant_msg in history:
+            if user_msg:
+                messages.append({"role": "user", "content": user_msg})
+            if assistant_msg:
+                messages.append({"role": "assistant", "content": assistant_msg})
+        messages.append({"role": "user", "content": message})
+        try:
+            # Get response from engine
+            response = self.engine.chat(messages, stream=False)
+            # Extract answer
+            if isinstance(response.get("answer"), str):
+                answer = response["answer"]
+            else:
+                answer = str(response.get("answer", "No response generated"))
+            # Format sources if requested
+            sources_text = ""
+            if show_sources and response.get("sources"):
+                sources_text = "\n\n---\n\n**📚 Sources from HPMOR:**\n\n" + self.format_sources(response["sources"])
+                answer = answer + sources_text
+            return answer, ""
+        except Exception as e:
+            error_msg = f"I apologize, but I encountered an error: {str(e)}\n\nPlease make sure the Groq API key is properly configured."
+            return error_msg, ""
+def create_interface() -> gr.Blocks:
+    """Create the Gradio interface."""
+    # Initialize chat interface
+    chat_interface = HFChatInterface()
+    with gr.Blocks(title="Chat with Harry Potter-Evans-Verres", theme=gr.themes.Soft()) as interface:
+        gr.Markdown(
+            """
+            # 🧙‍♂️ Chat with Harry James Potter-Evans-Verres
+            Hello! I'm Harry Potter-Evans-Verres from "Harry Potter and the Methods of Rationality."
+            Ask me anything about my adventures, experiments with magic, or my thoughts on rationality and science.
+            I'll respond based on my experiences and the scientific method, of course!
+            *Powered by RAG with ChromaDB and Groq API (llama-3.3-70b-versatile)*
+            """
+        )
+        with gr.Row():
+            with gr.Column(scale=3):
+                chatbot = gr.Chatbot(
+                    label="Chat",
+                    height=500,
+                    show_copy_button=True,
+                    avatar_images=(None, "🧙‍♂️")
+                )
+                with gr.Row():
+                    msg_input = gr.Textbox(
+                        label="Your Question",
+                        placeholder="Ask me anything... For example: 'What do you think about magic?' or 'Tell me about your experiments'",
+                        lines=2,
+                        scale=4
+                    )
+                    submit_btn = gr.Button("Send 📨", variant="primary", scale=1)
+            with gr.Column(scale=1):
+                gr.Markdown("### ⚙️ Settings")
+                show_sources = gr.Checkbox(
+                    value=True,
+                    label="Show Sources from Book"
+                )
+                gr.Markdown(
+                    """
+                    ### 💡 Tips
+                    - Ask about Harry's experiments
+                    - Inquire about his views on magic
+                    - Ask about other characters
+                    - Request explanations of events
+                    """
+                )
+        # Example questions
+        gr.Examples(
+            examples=[
+                "Harry, how did you first react when you learned magic was real?",
+                "What's your opinion on the way Hogwarts teaches magic?",
+                "Can you explain your scientific experiments with magic?",
+                "What do you think about Hermione?",
+                "How do you apply rationality to magical problems?",
+                "What's your relationship with Professor Quirrell like?",
+            ],
+            inputs=msg_input,
+            label="💬 Example Questions"
+        )
+        # Event handlers
+        def respond(message, history, sources):
+            """Handle message submission."""
+            answer, _ = chat_interface.process_message(message, history, sources)
+            history.append([message, answer])
+            return "", history
+        msg_input.submit(
+            respond,
+            inputs=[msg_input, chatbot, show_sources],
+            outputs=[msg_input, chatbot]
+        )
+        submit_btn.click(
+            respond,
+            inputs=[msg_input, chatbot, show_sources],
+            outputs=[msg_input, chatbot]
+        )
+        gr.Markdown(
+            """
+            ---
+            **About:** This chatbot uses Retrieval-Augmented Generation (RAG) to answer questions
+            based on "Harry Potter and the Methods of Rationality" by Eliezer Yudkowsky.
+            **Note:** Requires a Groq API key. Get one free at [console.groq.com](https://console.groq.com/)
+            """
+        )
+    return interface
+if __name__ == "__main__":
+    # Check for Groq API key
+    if not os.getenv("GROQ_API_KEY"):
+        print("WARNING: GROQ_API_KEY not found in environment variables!")
+        print("Please set it in your HuggingFace Space secrets.")
+    # Launch interface
+    interface = create_interface()
+    interface.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+chromadb==1.1.1
+gradio==5.49.1
+httpx==0.28.1
+huggingface-hub==0.35.3
+langchain==0.3.27
+langchain-groq==0.3.8
+llama-index==0.14.4
+llama-index-embeddings-huggingface==0.6.1
+llama-index-llms-groq==0.4.1
+llama-index-vector-stores-chroma==0.5.3
+lxml==6.0.2
+sentence-transformers