Spaces:

ashishninehertz
/

ConvoBot

Sleeping

App Files Files Community

ashish-ninehertz commited on Jun 27, 2025

Commit

944bdbc

1 Parent(s): e379072

ferfwef

Browse files

Files changed (12) hide show

.gitignore +43 -0
.gitattributes → CrawlyBot/.gitattributes +0 -0
CrawlyBot/README.md +13 -0
README.md +7 -10
app +1 -0
app.py +286 -0
requirements.txt +20 -0
setup.py +0 -0
tests/test_connection.py +19 -0
tests/test_qdrant_integration.py +12 -0
tests/test_storage.py +0 -0
tests/test_ws.py +12 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,43 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# Virtual environment
+venv/
+.env
+.venv/
+.history/
+# Jupyter Notebook checkpoints
+.ipynb_checkpoints
+# VS Code settings
+.vscode/
+# OS files
+.DS_Store
+Thumbs.db
+# Logs
+*.log
+# Environment variable files
+.env
+.env.*
+# Data and cache
+data/
+*.sqlite3
+*.db
+# Python egg files
+*.egg
+*.egg-info/
+dist/
+build/
+.eggs/
+# Qdrant local storage (if

.gitattributes → CrawlyBot/.gitattributes RENAMED Viewed

File without changes

CrawlyBot/README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: CrawlyBot
+emoji: 🏢
+colorFrom: blue
+colorTo: red
+sdk: gradio
+sdk_version: 5.34.2
+app_file: app.py
+pinned: false
+license: other
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

README.md CHANGED Viewed

@@ -1,13 +1,10 @@
 ---
-title: CrawlyBot
-emoji: 🏢
-colorFrom: blue
-colorTo: red
 sdk: gradio
-sdk_version: 5.34.2
-app_file: app.py
 pinned: false
-license: other
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Buddy Your Bot – RAG Chatbot
+emoji: 🤖
+colorFrom: indigo
+colorTo: blue
 sdk: gradio
+sdk_version: "4.25.0"
+app_file: app/web/gradio_app.py
 pinned: false
+---

app ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 8b9665bbfca64069c80341de7a68c92b6d066bd1

app.py ADDED Viewed

	@@ -0,0 +1,286 @@

+import gradio as gr
+import uuid
+import logging
+from typing import List, Tuple
+from app.main import RAGSystem
+import asyncio
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Initialize the RAG system
+rag = RAGSystem()
+def create_session() -> str:
+    """Create a new session ID"""
+    return str(uuid.uuid4())
+def index_website(url: str, session_id: str) -> Tuple[bool, str]:
+    """Index a website for a given session"""
+    try:
+        result = rag.crawl_and_index(session_id, url)
+        if result["status"] == "success":
+            return True, f"Successfully indexed {len(result.get('urls_processed', []))} pages"
+        return False, result.get("message", "Unknown error during indexing")
+    except Exception as e:
+        logger.error(f"Indexing error: {str(e)}")
+        return False, f"Error during indexing: {str(e)}"
+def chat_response(
+    session_id: str,
+    message: str,
+    model_choice: str,
+    ollama_url: str,
+    gemini_api_key: str,
+    chat_history: List[dict]
+) -> Tuple[List[dict], str]:
+    """Generate a chat response with proper error handling"""
+    if not session_id:
+        chat_history.append({"role": "user", "content": f"🧑‍💻 {message}"})
+        chat_history.append({"role": "assistant", "content": "🤖 Please index a website first or enter a valid session ID"})
+        return chat_history, ""
+    try:
+        response = asyncio.run(rag.chat(
+            session_id=session_id,
+            question=message,
+            model=model_choice.lower(),
+            ollama_url=ollama_url if model_choice == "mistral" else None,
+            gemini_api_key=gemini_api_key if model_choice == "gemini" else None
+        ))
+        if response["status"] == "success":
+            answer = response["response"]
+            sources = "\n\nSources:\n" + "\n".join(
+                f"- {src['source_url']}" for src in response.get("sources", [])
+            ) if response.get("sources") else ""
+            full_response = f"🤖 {answer}{sources}"
+        else:
+            full_response = f"🤖 Error: {response.get('message', 'Unknown error')}"
+        chat_history.append({"role": "user", "content": f"🧑‍💻 {message}"})
+        chat_history.append({"role": "assistant", "content": full_response})
+        return chat_history, ""
+    except Exception as e:
+        logger.error(f"Chat error: {str(e)}")
+        chat_history.append({"role": "user", "content": f"🧑‍💻 {message}"})
+        chat_history.append({"role": "assistant", "content": f"🤖 System error: {str(e)}"})
+        return chat_history, ""
+def toggle_model_inputs(model_choice: str) -> List[gr.update]:
+    """Show/hide model-specific inputs"""
+    if model_choice == "mistral":
+        return [gr.update(visible=True), gr.update(visible=False)]
+    return [gr.update(visible=False), gr.update(visible=True)]
+def load_session(existing_session_id: str) -> Tuple[str, str]:
+    """Load an existing session"""
+    if existing_session_id:
+        # Here you might want to add validation if the session exists
+        return existing_session_id, f"Loaded existing session: {existing_session_id}"
+    return "", "Please enter a valid session ID"
+def get_session(self, session_id: str):
+    # If session exists in memory, return it
+    if session_id in self.sessions:
+        return self.sessions[session_id]
+    # If not, check if Qdrant collection exists and has documents
+    collection_name = self.get_collection_name(session_id)
+    try:
+        results = self.qdrant_client.scroll(collection_name=collection_name, limit=1)
+        if results and results[0]:
+            # Rehydrate session in memory
+            self.sessions[session_id] = {
+                "documents": [],  # Optionally, you can fetch all docs if needed
+                "history": []
+            }
+            return self.sessions[session_id]
+    except Exception as e:
+        logger.warning(f"Session {session_id} not found in Qdrant: {e}")
+    # If not found, return None or raise
+    raise ValueError("No documents indexed for this session")
+# Custom CSS for better styling
+custom_css = """
+.gradio-container {
+    max-width: 1200px !important;
+    margin: 0 auto !important;
+}
+.dark .gradio-container {
+    background: #1e1e2e !important;
+}
+#chatbot {
+    min-height: 500px;
+    border-radius: 12px !important;
+}
+.message.user {
+    border-left: 4px solid #4f46e5 !important;
+}
+.message.assistant {
+    border-left: 4px solid #10b981 !important;
+}
+.btn-primary {
+    background: linear-gradient(to right, #4f46e5, #7c3aed) !important;
+    border: none !important;
+}
+.btn-primary:hover {
+    background: linear-gradient(to right, #4338ca, #6d28d9) !important;
+}
+.prose {
+    max-width: 100% !important;
+}
+"""
+with gr.Blocks(title="RAG Chat with Mistral/Gemini", css=custom_css, theme="soft") as demo:
+    # Header section
+    with gr.Row():
+        gr.Markdown("""
+        # 🌐 RAG Chat Assistant
+        ### Chat with any website using Mistral or Gemini
+        """)
+    # Session state
+    session_id = gr.State("")
+    with gr.Tabs():
+        with gr.TabItem("📚 Index Website"):
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("### Step 1: Configure and Index")
+                    with gr.Group():
+                        url_input = gr.Textbox(
+                            label="Website URL to index",
+                            placeholder="https://example.com",
+                            interactive=True,
+                            lines=1
+                        )
+                        with gr.Row():
+                            model_choice = gr.Radio(
+                                choices=["mistral", "gemini"],
+                                label="Select Model",
+                                value="mistral",
+                                interactive=True
+                            )
+                            index_btn = gr.Button(
+                                "🚀 Index Website",
+                                variant="primary",
+                                scale=0
+                            )
+                    with gr.Accordion("🔐 Model Settings", open=False):
+                        ollama_url = gr.Textbox(
+                            label="Ollama URL (required for Mistral)",
+                            placeholder="http://localhost:11434",
+                            visible=True
+                        )
+                        gemini_api_key = gr.Textbox(
+                            label="Gemini API Key (required for Gemini)",
+                            placeholder="your-api-key-here",
+                            visible=False,
+                            type="password"
+                        )
+                    status_output = gr.Textbox(
+                        label="Status",
+                        interactive=False,
+                        elem_classes="prose"
+                    )
+                    gr.Markdown("""
+                    **Instructions:**
+                    1. Enter a website URL
+                    2. Select your preferred model
+                    3. Configure model settings if needed
+                    4. Click 'Index Website'
+                    """)
+        with gr.TabItem("💬 Chat"):
+            with gr.Row():
+                with gr.Column(scale=2):
+                    # New session ID input for resuming sessions
+                    with gr.Accordion("🔍 Resume Previous Session", open=False):
+                        existing_session_input = gr.Textbox(
+                            label="Enter existing Session ID",
+                            placeholder="Paste your session ID here...",
+                            interactive=True
+                        )
+                        load_session_btn = gr.Button(
+                            "🔁 Load Session",
+                            variant="secondary"
+                        )
+                        session_status = gr.Textbox(
+                            label="Session Status",
+                            interactive=False
+                        )
+                    chatbot = gr.Chatbot(
+                        label="Chat History",
+                        height=500,
+                        avatar_images=(None, None),
+                        show_copy_button=True,
+                        type="messages"  # Use the new format
+                    )
+                    with gr.Row():
+                        message_input = gr.Textbox(
+                            label="Type your message",
+                            placeholder="Ask about the website content...",
+                            interactive=True,
+                            container=False,
+                            scale=7,
+                            autofocus=True
+                        )
+                        send_btn = gr.Button(
+                            "Send",
+                            variant="primary",
+                            scale=1,
+                            min_width=100
+                        )
+    # Event handlers
+    model_choice.change(
+        fn=toggle_model_inputs,
+        inputs=model_choice,
+        outputs=[ollama_url, gemini_api_key]
+    )
+    index_btn.click(
+        fn=create_session,
+        outputs=session_id
+    ).success(
+        fn=index_website,
+        inputs=[url_input, session_id],
+        outputs=[status_output]
+    )
+    # New handler for loading existing sessions
+    load_session_btn.click(
+        fn=load_session,
+        inputs=[existing_session_input],
+        outputs=[session_id, session_status]
+    )
+    send_btn.click(
+        fn=chat_response,
+        inputs=[session_id, message_input, model_choice, ollama_url, gemini_api_key, chatbot],
+        outputs=[chatbot, message_input]
+    )
+    # Allow submitting with Enter key
+    message_input.submit(
+        fn=chat_response,
+        inputs=[session_id, message_input, model_choice, ollama_url, gemini_api_key, chatbot],
+        outputs=[chatbot, message_input]
+    )
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        favicon_path="https://www.gradio.app/assets/favicon.ico"
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+fastapi>=0.68.0
+uvicorn[standard]>=0.15.0
+streamlit==1.32.0
+requests==2.31.0
+beautifulsoup4==4.12.3
+python-dotenv==1.0.0
+langchain>=0.1.0
+langchain-community>=0.0.28
+sentence-transformers>=2.2.0
+ollama>=0.1.0
+httpx==0.27.0
+aiohttp==3.9.3
+pydantic>=2.0.0
+numpy>=1.21.0
+websockets
+qdrant-client>=1.1.0
+python-multipart>=0.0.5
+python-jose[cryptography]>=3.3.0
+python-dateutil>=2.8.2
+gradio>=4.0

setup.py ADDED Viewed

File without changes

tests/test_connection.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import socket
+from qdrant_client import QdrantClient
+def check_port(host, port):
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        return s.connect_ex((host, port)) == 0
+host = "localhost"
+port = 6333
+if check_port(host, port):
+    print(f"Port {port} is open. Testing Qdrant API...")
+    try:
+        client = QdrantClient(host=host, port=port)
+        print("Success! Collections:", client.get_collections())
+    except Exception as e:
+        print(f"API Error: {e}")
+else:
+    print(f"ERROR: Port {port} is closed. Check if Qdrant is running.")

tests/test_qdrant_integration.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import pytest
+from qdrant_client import QdrantClient
+from qdrant_client.models import VectorParams, Distance
+@pytest.fixture
+def qdrant_client():
+    return QdrantClient(host="localhost", port=6333)
+def test_collection_creation(qdrant_client):
+    test_collection = "test_collection"
+    qdrant_client.recreate_collection(test_collection, vectors_config=VectorParams(size=384, distance=Distance.COSINE))
+    assert qdrant_client.collection_exists(test_collection)

tests/test_storage.py ADDED Viewed

File without changes

tests/test_ws.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import asyncio
+import websockets
+import json
+async def test_ws():
+    uri = "ws://localhost:8000/ws/test-session"
+    async with websockets.connect(uri) as ws:
+        await ws.send(json.dumps({"query": "What is AI?"}))
+        response = await ws.recv()
+        print("Response:", response)
+asyncio.run(test_ws())