Spaces:

kerdosdotio
/

Custom-LLM-Chat

Running

Bhaskar Ram commited on Mar 4

Commit

3151380

1 Parent(s): 634117a

feat: model selector, progress bar, sources panel, chat export, CSV parsing, dockerignore

UI/UX Features:
- app.py: model selector dropdown (Llama 3.1 8B, Mistral 7B, Mixtral 8x7B, Qwen2.5 72B)
— switches active LLM per-request without server restart
- app.py: gr.Progress in process_files() with step labels (Parsing / Embedding / Done)
— no more silent 30s freeze on large uploads
- app.py: Retrieved Sources accordion below chat — shows each chunk's source file,
cosine score, score bar (█░ visual), and 220-char preview
- app.py: Chat export button — downloads conversation as timestamped Markdown file
- app.py: Max response tokens slider (128–4096, default 1024) in Settings panel
- app.py: CSS moved into Blocks(css=) to avoid duplicate arg on launch()

RAG Core:
- document_loader.py: CSV files now parsed with csv.DictReader into
'Column: value. Column: value.' natural-language sentences per row

Infra:
- .dockerignore: excludes .git, .env, __pycache__, tests, venv, .vscode,
sdk/ and FAISS snapshot files from Docker image

Files changed (3) hide show

.dockerignore +52 -0
app.py +162 -54
rag/document_loader.py +31 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,52 @@

+# Ignore files that should never go into the Docker image
+# Git internals
+.git
+.gitignore
+# Python cache & build artifacts
+__pycache__/
+*.py[cod]
+*.pyo
+*.pyd
+.Python
+*.egg-info/
+dist/
+build/
+.eggs/
+# Virtual environments
+.venv/
+venv/
+env/
+# Environment secrets — NEVER bake into image
+.env
+.env.*
+!.env.example
+# Dev dependencies and tooling
+requirements-dev.txt
+.pytest_cache/
+.ruff_cache/
+.mypy_cache/
+# Test files
+tests/
+# IDE / editor configs
+.vscode/
+.idea/
+*.swp
+*.swo
+# OS noise
+.DS_Store
+Thumbs.db
+# SDK (not needed in runtime image)
+sdk/
+# Saved FAISS index snapshots (user-local, not for containers)
+*.faiss
+*.pkl

app.py CHANGED Viewed

@@ -3,51 +3,65 @@ app.py — Enterprise Document Q&A (RAG)
 Powered by Llama 3 + FAISS + Sentence Transformers
 A Demo Product by Kerdos Infrasoft Private Limited
 Website: https://kerdos.in
 """
 import os
 from dotenv import load_dotenv
 import gradio as gr
 from rag.document_loader import load_documents
 from rag.embedder import build_index, add_to_index
 from rag.retriever import retrieve
 from rag.chain import answer_stream
-load_dotenv()  # Load HF_TOKEN etc. from .env when running locally
-# ─────────────────────────────────────────────
 # State helpers
-# ─────────────────────────────────────────────
 def get_hf_token(user_token: str) -> str:
-    """Prefer user-supplied token; fall back to Space secret."""
     t = user_token.strip() if user_token else ""
     return t or os.environ.get("HF_TOKEN", "")
-# ─────────────────────────────────────────────
 # Gradio handlers
-# ─────────────────────────────────────────────
-def process_files(files, current_index, indexed_sources):
-    """Parse uploaded files and build / extend the FAISS index.
-    Args:
-        files:           Uploaded file objects from gr.File.
-        current_index:   Existing VectorIndex state (None on first upload).
-        indexed_sources: Set of already-indexed filenames (duplicate guard).
-    """
     if not files:
         return current_index, indexed_sources, "⚠️ No files uploaded."
     file_paths = [f.name for f in files] if hasattr(files[0], "name") else files
-    # ── Duplicate guard ────────────────────────────────────────────────────
-    # Filter out files whose name is already in the knowledge base so that
-    # re-uploading the same document doesn't silently double the chunk count.
     new_paths, skipped = [], []
     for p in file_paths:
-        from pathlib import Path
         name = Path(p).name
         if name in indexed_sources:
             skipped.append(name)
@@ -58,13 +72,18 @@ def process_files(files, current_index, indexed_sources):
         return current_index, indexed_sources, (
             f"⚠️ Already indexed: {', '.join(skipped)}. No new documents added."
         )
-    # ──────────────────────────────────────────────────────────────────────
     docs = load_documents(new_paths)
     if not docs:
-        return current_index, indexed_sources, "❌ Could not extract text from the uploaded files. Please upload PDF, DOCX, or TXT files."
     try:
         if current_index is None:
             idx = build_index(docs)
@@ -73,6 +92,8 @@ def process_files(files, current_index, indexed_sources):
     except Exception as e:
         return current_index, indexed_sources, f"❌ Failed to build index: {e}"
     new_sources = {d["source"] for d in docs}
     updated_sources = indexed_sources | new_sources
     total_chunks = idx.index.ntotal
@@ -85,19 +106,19 @@ def process_files(files, current_index, indexed_sources):
     return idx, updated_sources, msg
-def chat(user_message, history, vector_index, hf_token_input, top_k):
-    """Streaming chat handler — yields progressively-updated history for real-time response."""
     if not user_message.strip():
-        yield history, ""
         return
     hf_token = get_hf_token(hf_token_input)
     if not hf_token:
         history = history + [
             {"role": "user", "content": user_message},
-            {"role": "assistant", "content": "⚠️ Please provide a Hugging Face API token to use the chat."},
         ]
-        yield history, ""
         return
     if vector_index is None:
@@ -105,32 +126,75 @@ def chat(user_message, history, vector_index, hf_token_input, top_k):
             {"role": "user", "content": user_message},
             {"role": "assistant", "content": "⚠️ Please upload at least one document first."},
         ]
-        yield history, ""
         return
     try:
         chunks = retrieve(user_message, vector_index, top_k=int(top_k))
-        # Append placeholder so user sees their message immediately
         history = history + [
             {"role": "user", "content": user_message},
             {"role": "assistant", "content": ""},
         ]
         for partial in answer_stream(user_message, chunks, hf_token, chat_history=history[:-2]):
             history[-1]["content"] = partial
-            yield history, ""
     except Exception as e:
         history[-1]["content"] = f"❌ Error: {e}"
-        yield history, ""
 def reset_all():
-    """Clear index, chat, and the indexed-sources tracker."""
-    return None, set(), [], "🗑️ Knowledge base and chat cleared.", ""
-# ─────────────────────────────────────────────
-# UI
-# ─────────────────────────────────────────────
 CSS = """
 /* ── Kerdos Brand Theme ── */
@@ -196,16 +260,19 @@ body { font-family: 'Segoe UI', Arial, sans-serif; }
     font-size: 0.82em;
     color: #888;
 }
-#title { text-align: center; }
 #subtitle { text-align: center; color: #6B8CFF; margin-bottom: 8px; }
 .upload-box { border: 2px dashed #0055FF !important; border-radius: 12px !important; }
 #status-box { font-size: 0.9em; }
 footer { display: none !important; }
 """
-with gr.Blocks(title="Kerdos AI — Custom LLM Chat | Document Q&A Demo") as demo:
-    # ── Kerdos Header ─────────────────────────
     gr.HTML("""
     <div id="kerdos-header">
         <div id="kerdos-logo-line">
@@ -226,12 +293,10 @@ with gr.Blocks(title="Kerdos AI — Custom LLM Chat | Document Q&A Demo") as dem
             &nbsp;|&nbsp;
             📞 <a href="https://kerdos.in/contact" target="_blank" style="color:#00C2FF; text-decoration:none;">Contact Us</a>
         </div>
         <div id="kerdos-demo-banner">
             ⚠️ <strong style="color:#FFA000;">This is a Demo Version.</strong>
             <span style="color:#FFD080;"> Features, model selection, and customisation are limited. The full product will support private, on-premise LLM deployments tailored to your organisation.</span>
         </div>
         <div id="kerdos-fund-banner">
             🚀 <strong style="color:#00C2FF;">We are actively seeking investment &amp; partnerships</strong>
             <span style="color:#A0C8FF;"> to build the <em>fully customisable</em> enterprise edition — including <strong>private LLM hosting</strong>, custom model fine-tuning, data privacy guarantees, and white-label deployments.</span>
@@ -249,12 +314,12 @@ with gr.Blocks(title="Kerdos AI — Custom LLM Chat | Document Q&A Demo") as dem
         elem_id="subtitle",
     )
-    # ── Shared state ─────────────────────────
     vector_index = gr.State(None)
-    indexed_sources = gr.State(set())   # tracks filenames already in the index
     with gr.Row():
-        # ── Left panel: Upload + config ──────
         with gr.Column(scale=1, min_width=300):
             gr.Markdown("### 📂 Upload Documents")
             file_upload = gr.File(
@@ -278,16 +343,34 @@ with gr.Blocks(title="Kerdos AI — Custom LLM Chat | Document Q&A Demo") as dem
                 type="password",
                 value="",
             )
             top_k_slider = gr.Slider(
                 minimum=1, maximum=10, value=5, step=1,
                 label="Chunks to retrieve (top-K)",
             )
             reset_btn = gr.Button("🗑️ Clear All", variant="stop")
-        # ── Right panel: Chat ─────────────────
         with gr.Column(scale=2):
             gr.Markdown("### 💬 Ask Questions")
-            chatbot = gr.Chatbot(height=460, show_label=False)
             with gr.Row():
                 user_input = gr.Textbox(
                     placeholder="Ask a question about your documents...",
@@ -297,18 +380,31 @@ with gr.Blocks(title="Kerdos AI — Custom LLM Chat | Document Q&A Demo") as dem
                 )
                 send_btn = gr.Button("Send ▶", variant="primary", scale=1)
-    # ── Examples ─────────────────────────────
     gr.Examples(
         examples=[
             ["What is the refund policy?"],
             ["Summarize the key points of this document."],
             ["What are the terms of service?"],
             ["Who is the contact person for support?"],
         ],
         inputs=user_input,
     )
-    # ── Event wiring ──────────────────────────
     index_btn.click(
         fn=process_files,
         inputs=[file_upload, vector_index, indexed_sources],
@@ -317,23 +413,35 @@ with gr.Blocks(title="Kerdos AI — Custom LLM Chat | Document Q&A Demo") as dem
     send_btn.click(
         fn=chat,
-        inputs=[user_input, chatbot, vector_index, hf_token_input, top_k_slider],
-        outputs=[chatbot, user_input],
     )
     user_input.submit(
         fn=chat,
-        inputs=[user_input, chatbot, vector_index, hf_token_input, top_k_slider],
-        outputs=[chatbot, user_input],
     )
     reset_btn.click(
         fn=reset_all,
         inputs=[],
-        outputs=[vector_index, indexed_sources, chatbot, status_box, user_input],
     )
-    # ── Kerdos Footer ───────��─────────────────
     gr.HTML("""
     <div id="kerdos-footer">
         &copy; 2024–2026 <strong>Kerdos Infrasoft Private Limited</strong> &nbsp;|&nbsp;
@@ -348,5 +456,5 @@ with gr.Blocks(title="Kerdos AI — Custom LLM Chat | Document Q&A Demo") as dem
     """)
 if __name__ == "__main__":
-    demo.queue()  # Required for streaming generators
-    demo.launch(css=CSS, theme=gr.themes.Soft())

 Powered by Llama 3 + FAISS + Sentence Transformers
 A Demo Product by Kerdos Infrasoft Private Limited
 Website: https://kerdos.in
+New features in this version:
+  • Model selector dropdown (switch LLM without restart)
+  • Indexing progress indicator (gr.Progress)
+  • MAX_NEW_TOKENS slider exposed in UI
+  • Retrieved sources panel with cosine scores (accordion)
+  • Chat export — download conversation as Markdown
+  • .dockerignore added for security
 """
 import os
+import datetime
+import tempfile
+from pathlib import Path
 from dotenv import load_dotenv
 import gradio as gr
 from rag.document_loader import load_documents
 from rag.embedder import build_index, add_to_index
 from rag.retriever import retrieve
 from rag.chain import answer_stream
+import rag.chain as _chain_module
+load_dotenv()
+# ─────────────────────────────────────────────────────────────────────────────
+# Available models (HF Inference API — free tier)
+# ─────────────────────────────────────────────────────────────────────────────
+AVAILABLE_MODELS = {
+    "Llama 3.1 8B Instruct  ⚡ (default)": "meta-llama/Llama-3.1-8B-Instruct",
+    "Mistral 7B Instruct v0.3": "mistralai/Mistral-7B-Instruct-v0.3",
+    "Mixtral 8×7B Instruct v0.1": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "Qwen2.5 72B Instruct": "Qwen/Qwen2.5-72B-Instruct",
+}
+DEFAULT_MODEL_LABEL = list(AVAILABLE_MODELS.keys())[0]
+# ─────────────────────────────────────────────────────────────────────────────
 # State helpers
+# ─────────────────────────────────────────────────────────────────────────────
 def get_hf_token(user_token: str) -> str:
     t = user_token.strip() if user_token else ""
     return t or os.environ.get("HF_TOKEN", "")
+# ─────────────────────────────────────────────────────────────────────────────
 # Gradio handlers
+# ─────────────────────────────────────────────────────────────────────────────
+def process_files(files, current_index, indexed_sources, progress=gr.Progress()):
+    """Parse uploaded files and build / extend the FAISS index with live progress."""
     if not files:
         return current_index, indexed_sources, "⚠️ No files uploaded."
     file_paths = [f.name for f in files] if hasattr(files[0], "name") else files
+    # ── Duplicate guard ──────────────────────────────────────────────────────
     new_paths, skipped = [], []
     for p in file_paths:
         name = Path(p).name
         if name in indexed_sources:
             skipped.append(name)
         return current_index, indexed_sources, (
             f"⚠️ Already indexed: {', '.join(skipped)}. No new documents added."
         )
+    # ── Load ─────────────────────────────────────────────────────────────────
+    progress(0.10, desc="📄 Parsing documents…")
     docs = load_documents(new_paths)
     if not docs:
+        return current_index, indexed_sources, (
+            "❌ Could not extract text. Please upload PDF, DOCX, TXT, MD, or CSV."
+        )
+    # ── Embed & index ─────────────────────────────────────────────────────────
+    progress(0.40, desc="🧠 Embedding chunks…")
     try:
         if current_index is None:
             idx = build_index(docs)
     except Exception as e:
         return current_index, indexed_sources, f"❌ Failed to build index: {e}"
+    progress(1.0, desc="✅ Done!")
     new_sources = {d["source"] for d in docs}
     updated_sources = indexed_sources | new_sources
     total_chunks = idx.index.ntotal
     return idx, updated_sources, msg
+def chat(user_message, history, vector_index, hf_token_input, top_k, model_label, max_tokens):
+    """Streaming chat handler — yields progressively-updated history + sources panel."""
     if not user_message.strip():
+        yield history, "", ""
         return
     hf_token = get_hf_token(hf_token_input)
     if not hf_token:
         history = history + [
             {"role": "user", "content": user_message},
+            {"role": "assistant", "content": "⚠️ Please provide a Hugging Face API token."},
         ]
+        yield history, "", ""
         return
     if vector_index is None:
             {"role": "user", "content": user_message},
             {"role": "assistant", "content": "⚠️ Please upload at least one document first."},
         ]
+        yield history, "", ""
         return
+    # Apply model + token settings from UI for this request
+    selected_model = AVAILABLE_MODELS.get(model_label, _chain_module.LLM_MODEL)
+    _chain_module.LLM_MODEL = selected_model
+    _chain_module.MAX_NEW_TOKENS = int(max_tokens)
     try:
         chunks = retrieve(user_message, vector_index, top_k=int(top_k))
+        # Build sources panel text
+        if chunks:
+            sources_lines = ["**🔍 Retrieved Chunks:**\n"]
+            for i, c in enumerate(chunks, 1):
+                score_bar = "█" * int(c["score"] * 10) + "░" * (10 - int(c["score"] * 10))
+                sources_lines.append(
+                    f"**[{i}] {c['source']}** — score: `{c['score']:.3f}` `{score_bar}`\n"
+                    f"> {c['text'][:220].strip()}{'…' if len(c['text']) > 220 else ''}\n"
+                )
+            sources_md = "\n".join(sources_lines)
+        else:
+            sources_md = "_(No relevant chunks above score threshold)_"
+        # Append placeholder for streaming
         history = history + [
             {"role": "user", "content": user_message},
             {"role": "assistant", "content": ""},
         ]
         for partial in answer_stream(user_message, chunks, hf_token, chat_history=history[:-2]):
             history[-1]["content"] = partial
+            yield history, "", sources_md
+        yield history, "", sources_md
     except Exception as e:
         history[-1]["content"] = f"❌ Error: {e}"
+        yield history, "", ""
+def export_chat(history) -> str | None:
+    """Export the current chat history to a Markdown file for download."""
+    if not history:
+        return None
+    lines = [
+        f"# Kerdos AI — Chat Export",
+        f"_Exported: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}_\n",
+        "---\n",
+    ]
+    for msg in history:
+        role = "👤 **User**" if msg["role"] == "user" else "🤖 **Assistant**"
+        lines.append(f"{role}\n\n{msg['content']}\n\n---\n")
+    tmp = tempfile.NamedTemporaryFile(
+        mode="w", suffix=".md", prefix="kerdos_chat_", delete=False, encoding="utf-8"
+    )
+    tmp.write("\n".join(lines))
+    tmp.close()
+    return tmp.name
 def reset_all():
+    """Clear index, chat, sources panel, and the indexed-sources tracker."""
+    return None, set(), [], "🗑️ Knowledge base and chat cleared.", "", ""
+# ─────────────────────────────────────────────────────────────────────────────
+# CSS
+# ─────────────────────────────────────────────────────────────────────────────
 CSS = """
 /* ── Kerdos Brand Theme ── */
     font-size: 0.82em;
     color: #888;
 }
 #subtitle { text-align: center; color: #6B8CFF; margin-bottom: 8px; }
 .upload-box { border: 2px dashed #0055FF !important; border-radius: 12px !important; }
 #status-box { font-size: 0.9em; }
 footer { display: none !important; }
 """
+# ─────────────────────────────────────────────────────────────────────────────
+# UI
+# ─────────────────────────────────────────────────────────────────────────────
+with gr.Blocks(title="Kerdos AI — Custom LLM Chat | Document Q&A Demo", css=CSS) as demo:
+    # ── Kerdos Header ────────────────────────────────────────────────────────
     gr.HTML("""
     <div id="kerdos-header">
         <div id="kerdos-logo-line">
             &nbsp;|&nbsp;
             📞 <a href="https://kerdos.in/contact" target="_blank" style="color:#00C2FF; text-decoration:none;">Contact Us</a>
         </div>
         <div id="kerdos-demo-banner">
             ⚠️ <strong style="color:#FFA000;">This is a Demo Version.</strong>
             <span style="color:#FFD080;"> Features, model selection, and customisation are limited. The full product will support private, on-premise LLM deployments tailored to your organisation.</span>
         </div>
         <div id="kerdos-fund-banner">
             🚀 <strong style="color:#00C2FF;">We are actively seeking investment &amp; partnerships</strong>
             <span style="color:#A0C8FF;"> to build the <em>fully customisable</em> enterprise edition — including <strong>private LLM hosting</strong>, custom model fine-tuning, data privacy guarantees, and white-label deployments.</span>
         elem_id="subtitle",
     )
+    # ── Shared state ─────────────────────────────────────────────────────────
     vector_index = gr.State(None)
+    indexed_sources = gr.State(set())
     with gr.Row():
+        # ── Left panel: Upload + Settings ────────────────────────────────────
         with gr.Column(scale=1, min_width=300):
             gr.Markdown("### 📂 Upload Documents")
             file_upload = gr.File(
                 type="password",
                 value="",
             )
+            # ── NEW: Model selector ──────���───────────────────────────────────
+            model_selector = gr.Dropdown(
+                choices=list(AVAILABLE_MODELS.keys()),
+                value=DEFAULT_MODEL_LABEL,
+                label="🤖 LLM Model",
+                info="Requires appropriate HF token permissions.",
+            )
             top_k_slider = gr.Slider(
                 minimum=1, maximum=10, value=5, step=1,
                 label="Chunks to retrieve (top-K)",
             )
+            # ── NEW: Max tokens slider ───────────────────────────────────────
+            max_tokens_slider = gr.Slider(
+                minimum=128, maximum=4096, value=1024, step=128,
+                label="Max response tokens",
+                info="Higher = longer answers, slower generation.",
+            )
             reset_btn = gr.Button("🗑️ Clear All", variant="stop")
+        # ── Right panel: Chat ─────────────────────────────────────────────────
         with gr.Column(scale=2):
             gr.Markdown("### 💬 Ask Questions")
+            chatbot = gr.Chatbot(height=420, show_label=False, type="messages")
             with gr.Row():
                 user_input = gr.Textbox(
                     placeholder="Ask a question about your documents...",
                 )
                 send_btn = gr.Button("Send ▶", variant="primary", scale=1)
+            with gr.Row():
+                # ── NEW: Export button ────────────────────────────────────────
+                export_btn = gr.Button("💾 Export Chat", variant="secondary", size="sm")
+                export_file = gr.File(label="Download", visible=False, scale=2)
+            # ── NEW: Retrieved sources accordion ──────────────────────────────
+            with gr.Accordion("🔍 Retrieved Sources", open=False):
+                sources_panel = gr.Markdown(
+                    value="_Sources will appear here after each answer._",
+                    label="Sources",
+                )
+    # ── Examples ─────────────────────────────────────────────────────────────
     gr.Examples(
         examples=[
             ["What is the refund policy?"],
             ["Summarize the key points of this document."],
             ["What are the terms of service?"],
             ["Who is the contact person for support?"],
+            ["List all products and their prices."],
         ],
         inputs=user_input,
     )
+    # ── Event wiring ──────────────────────────────────────────────────────────
     index_btn.click(
         fn=process_files,
         inputs=[file_upload, vector_index, indexed_sources],
     send_btn.click(
         fn=chat,
+        inputs=[user_input, chatbot, vector_index, hf_token_input,
+                top_k_slider, model_selector, max_tokens_slider],
+        outputs=[chatbot, user_input, sources_panel],
     )
     user_input.submit(
         fn=chat,
+        inputs=[user_input, chatbot, vector_index, hf_token_input,
+                top_k_slider, model_selector, max_tokens_slider],
+        outputs=[chatbot, user_input, sources_panel],
     )
     reset_btn.click(
         fn=reset_all,
         inputs=[],
+        outputs=[vector_index, indexed_sources, chatbot, status_box, user_input, sources_panel],
+    )
+    export_btn.click(
+        fn=export_chat,
+        inputs=[chatbot],
+        outputs=[export_file],
+    ).then(
+        fn=lambda f: gr.File(value=f, visible=f is not None),
+        inputs=[export_file],
+        outputs=[export_file],
     )
+    # ── Kerdos Footer ─────────────────────────────────────────────────────────
     gr.HTML("""
     <div id="kerdos-footer">
         &copy; 2024–2026 <strong>Kerdos Infrasoft Private Limited</strong> &nbsp;|&nbsp;
     """)
 if __name__ == "__main__":
+    demo.queue()
+    demo.launch(theme=gr.themes.Soft())

rag/document_loader.py CHANGED Viewed

@@ -72,5 +72,36 @@ def _load_docx(path: str) -> str:
 def _load_text(path: str) -> str:
     with open(path, "r", encoding="utf-8", errors="ignore") as f:
         return f.read()

 def _load_text(path: str) -> str:
+    """Load plain text files. CSVs are parsed into natural-language row sentences."""
+    ext = Path(path).suffix.lower()
+    if ext == ".csv":
+        return _load_csv(path)
     with open(path, "r", encoding="utf-8", errors="ignore") as f:
         return f.read()
+def _load_csv(path: str) -> str:
+    """
+    Parse a CSV file into natural-language sentences.
+    Each row becomes:   "ColumnA: value1. ColumnB: value2. ..."
+    This makes tabular data semantically meaningful to the LLM rather
+    than presenting it as raw comma-separated text.
+    """
+    import csv
+    rows: list[str] = []
+    with open(path, "r", encoding="utf-8", errors="ignore", newline="") as f:
+        reader = csv.DictReader(f)
+        if reader.fieldnames is None:
+            # Fallback to raw text for headerless CSVs
+            f.seek(0)
+            return f.read()
+        for row in reader:
+            parts = [f"{col}: {val.strip()}" for col, val in row.items() if val and val.strip()]
+            if parts:
+                rows.append(". ".join(parts) + ".")
+    return "\n".join(rows)