hungnha commited on Mar 13

Commit

4f9286e

1 Parent(s): 225bdac

build server

Browse files

Files changed (17) hide show

.dockerignore +19 -0
.gradio/certificate.pem +31 -0
Dockerfile +28 -0
README.md +90 -6
core/api/__init__.py +0 -0
core/api/server.py +206 -0
core/api/static/index.html +213 -0
core/api/static/style.css +417 -0
core/gradio/user_gradio.py +4 -3
core/preprocessing/docling_processor.py +5 -0
core/rag/embedding_model.py +2 -2
docker-compose.yml +14 -0
evaluation/eval_utils.py +5 -5
evaluation/ragas_eval.py +1 -1
requirements.txt +2 -0
setup.bat +1 -1
setup.sh +1 -1

.dockerignore ADDED Viewed

	@@ -0,0 +1,19 @@

+.git
+.gitignore
+.env
+.gradio
+__pycache__
+*.pyc
+*.pyo
+*.egg-info
+venv/
+.venv/
+data/
+test/
+*.md
+*.bat
+setup.sh
+.dockerignore
+Dockerfile
+docker-compose.yml
+.pixi/

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

Dockerfile ADDED Viewed

	@@ -0,0 +1,28 @@

+# ===== HUST RAG Backend =====
+FROM python:3.11-slim
+WORKDIR /app
+# Install dependencies first (cached layer)
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy project code
+COPY core/ core/
+COPY scripts/ scripts/
+COPY evaluation/ evaluation/
+# Create data directory (mount at runtime)
+RUN mkdir -p data
+# Expose API port
+EXPOSE 8000
+# Environment variables (override at runtime)
+ENV GROQ_API_KEY=""
+ENV SILICONFLOW_API_KEY=""
+ENV API_HOST="0.0.0.0"
+ENV API_PORT="8000"
+# Run download_data.py first (checks if data exists, downloads if not), then start FastAPI server
+CMD python scripts/download_data.py && python core/api/server.py

README.md CHANGED Viewed

@@ -82,13 +82,8 @@ DoAn/
 │   └── ragas_eval.py              # RAGAS evaluation with multiple metrics
 │
 ├── test/                          # Unit tests
-│   ├── conftest.py                # Shared fixtures and sample data
 │   ├── test_chunk.py              # Chunking logic tests
-│   ├── test_embedding.py          # Embedding model tests
-│   ├── test_vector_store.py       # Vector store tests
-│   ├── test_retrieval.py          # Retrieval pipeline tests
-│   ├── test_generator.py          # Generator/context builder tests
-│   └── ...
 │
 ├── data/                          # Data directory (downloaded from HuggingFace)
 │   ├── data_process/              # Processed markdown files
@@ -158,6 +153,95 @@ python scripts/run_app.py
 Access the chat interface at: **http://127.0.0.1:7860**
 ---
 ## 📖 Usage Guide

 │   └── ragas_eval.py              # RAGAS evaluation with multiple metrics
 │
 ├── test/                          # Unit tests
 │   ├── test_chunk.py              # Chunking logic tests
+│
 │
 ├── data/                          # Data directory (downloaded from HuggingFace)
 │   ├── data_process/              # Processed markdown files
 Access the chat interface at: **http://127.0.0.1:7860**
+### Running with FastAPI (API Mode)
+```bash
+source venv/bin/activate
+python core/api/server.py
+```
+- API server: **http://127.0.0.1:8000**
+- Chat UI: **http://127.0.0.1:8000/** or open `core/api/static/index.html` directly
+- API endpoint: `POST /api/chat` with `{"message": "your question"}`
+---
+## 🐳 Docker Deployment
+### Quick Start (Docker Compose)
+```bash
+# 1. Make sure data/ folder exists (download first if needed)
+python scripts/download_data.py
+# 2. Create .env with API keys
+echo "SILICONFLOW_API_KEY=your_key" > .env
+echo "GROQ_API_KEY=your_key" >> .env
+# 3. Build and run
+docker compose up --build -d
+# Access at http://localhost:8000
+```
+### Manual Docker Build & Run
+```bash
+# Build image
+docker build -t hust-rag-api .
+# Run container
+docker run -d \
+  -p 8000:8000 \
+  -v $(pwd)/data:/app/data \
+  --env-file .env \
+  --name hust-rag \
+  hust-rag-api
+```
+### Deploy to AWS (ECR + EC2)
+**Step 1 — Build & push image to ECR:**
+```bash
+# Login to ECR
+aws ecr get-login-password --region ap-southeast-1 | \
+  docker login --username AWS --password-stdin <ACCOUNT_ID>.dkr.ecr.ap-southeast-1.amazonaws.com
+# Create repository (first time only)
+aws ecr create-repository --repository-name hust-rag-api
+# Tag and push
+docker tag hust-rag-api:latest <ACCOUNT_ID>.dkr.ecr.ap-southeast-1.amazonaws.com/hust-rag-api:latest
+docker push <ACCOUNT_ID>.dkr.ecr.ap-southeast-1.amazonaws.com/hust-rag-api:latest
+```
+**Step 2 — Run on EC2:**
+```bash
+# Pull image
+docker pull <ACCOUNT_ID>.dkr.ecr.ap-southeast-1.amazonaws.com/hust-rag-api:latest
+# Upload data to EC2
+scp -r data/ ec2-user@<EC2_IP>:/home/ec2-user/data
+# Run container
+docker run -d \
+  -p 8000:8000 \
+  -v /home/ec2-user/data:/app/data \
+  -e GROQ_API_KEY=your_key \
+  -e SILICONFLOW_API_KEY=your_key \
+  --restart unless-stopped \
+  --name hust-rag \
+  hust-rag-api:latest
+```
+### Docker Notes
+- The `data/` directory is **mounted as a volume** — not baked into the image
+- API keys are passed via environment variables or `.env` file — never stored in the image
+- To update: rebuild image → push → pull on EC2 → restart container
 ---
 ## 📖 Usage Guide

core/api/__init__.py ADDED Viewed

File without changes

core/api/server.py ADDED Viewed

	@@ -0,0 +1,206 @@

+from __future__ import annotations
+import os
+import sys
+import re
+import json
+import logging
+import time as _time
+from collections import defaultdict
+from pathlib import Path
+from contextlib import asynccontextmanager
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+from fastapi import FastAPI, Request, Depends, HTTPException, Security
+from fastapi.responses import StreamingResponse, JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.security import APIKeyHeader
+from dotenv import find_dotenv, load_dotenv
+from openai import OpenAI
+# Setup path & env
+REPO_ROOT = Path(__file__).resolve().parents[2]
+if str(REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(REPO_ROOT))
+load_dotenv(find_dotenv(usecwd=True))
+from core.rag.embedding_model import EmbeddingConfig, QwenEmbeddings
+from core.rag.vector_store import ChromaConfig, ChromaVectorDB
+from core.rag.retrieval import Retriever, RetrievalMode, get_retrieval_config
+from core.rag.generator import RAGContextBuilder, SYSTEM_PROMPT
+# Config
+RETRIEVAL_MODE = RetrievalMode.HYBRID_RERANK
+RETRIEVAL_CFG = get_retrieval_config()
+LLM_MODEL = os.getenv("LLM_MODEL", "qwen/qwen3-32b")
+LLM_API_BASE = "https://api.groq.com/openai/v1"
+# Shared state
+_state = {}
+def _filter_think_tags(text: str) -> str:
+    return re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL).strip()
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Initialize RAG resources on startup."""
+    print("⏳ Initializing RAG pipeline...")
+    emb = QwenEmbeddings(EmbeddingConfig())
+    db = ChromaVectorDB(embedder=emb, config=ChromaConfig())
+    retriever = Retriever(vector_db=db)
+    api_key = (os.getenv("GROQ_API_KEY") or "").strip()
+    if not api_key:
+        raise RuntimeError("Missing GROQ_API_KEY")
+    _state["rag"] = RAGContextBuilder(retriever=retriever)
+    _state["llm"] = OpenAI(api_key=api_key, base_url=LLM_API_BASE)
+    print("✅ Ready!")
+    yield
+    _state.clear()
+app = FastAPI(title="HUST RAG API", lifespan=lifespan)
+# ── Security: CORS ──────────────────────────────────────────────
+# Chỉ cho phép frontend cùng origin hoặc origins cụ thể
+ALLOWED_ORIGINS = os.getenv("ALLOWED_ORIGINS", "").split(",")
+ALLOWED_ORIGINS = [o.strip() for o in ALLOWED_ORIGINS if o.strip()] or ["*"]
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=ALLOWED_ORIGINS,
+    allow_methods=["GET", "POST"],
+    allow_headers=["Content-Type", "X-API-Key"],
+)
+# ── Security: API Key Authentication ────────────────────────────
+_api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False)
+FRONTEND_API_KEY = os.getenv("FRONTEND_API_KEY", "").strip()
+async def verify_api_key(api_key: str = Security(_api_key_header)):
+    """Verify the API key from request header."""
+    if not FRONTEND_API_KEY:
+        # Nếu chưa đặt FRONTEND_API_KEY thì bỏ qua (dev mode)
+        return None
+    if api_key != FRONTEND_API_KEY:
+        raise HTTPException(status_code=403, detail="Invalid or missing API key")
+    return api_key
+# ── Security: Rate Limiting (in-memory) ─────────────────────────
+RATE_LIMIT_WINDOW = 60   # seconds
+RATE_LIMIT_MAX = int(os.getenv("RATE_LIMIT_MAX", "30"))  # max requests per window
+_rate_limit_store: dict[str, list[float]] = defaultdict(list)
+async def rate_limit(request: Request):
+    """Simple per-IP rate limiter."""
+    client_ip = request.client.host if request.client else "unknown"
+    now = _time.time()
+    # Cleanup old entries
+    _rate_limit_store[client_ip] = [
+        t for t in _rate_limit_store[client_ip] if now - t < RATE_LIMIT_WINDOW
+    ]
+    if len(_rate_limit_store[client_ip]) >= RATE_LIMIT_MAX:
+        raise HTTPException(
+            status_code=429,
+            detail=f"Rate limit exceeded. Max {RATE_LIMIT_MAX} requests per minute."
+        )
+    _rate_limit_store[client_ip].append(now)
+    return client_ip
+# Serve static files (CSS, JS, images, etc.)
+STATIC_DIR = Path(__file__).parent / "static"
+from fastapi.staticfiles import StaticFiles
+from fastapi.responses import FileResponse
+app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
+@app.get("/")
+async def index():
+    """Serve the chat UI."""
+    return FileResponse(str(STATIC_DIR / "index.html"))
+@app.post("/api/chat")
+async def chat(
+    request: Request,
+    _key: str = Depends(verify_api_key),
+    _ip: str = Depends(rate_limit),
+):
+    """Chat endpoint with Server-Sent Events streaming."""
+    body = await request.json()
+    question = (body.get("message") or "").strip()
+    if not question:
+        return JSONResponse({"error": "Empty message"}, status_code=400)
+    # Retrieve context
+    import time
+    start_time = time.time()
+    logger.info(f"Start retrieval for question: {question}")
+    prepared = _state["rag"].retrieve_and_prepare(
+        question,
+        k=RETRIEVAL_CFG.top_k,
+        initial_k=RETRIEVAL_CFG.initial_k,
+        mode=RETRIEVAL_MODE.value,
+    )
+    if not prepared["results"]:
+        return JSONResponse({"answer": "Xin lỗi, tôi không tìm thấy thông tin phù hợp."})
+    retrieval_time = time.time() - start_time
+    logger.info(f"Retrieval took {retrieval_time:.2f}s")
+    def stream():
+        llm_start_time = time.time()
+        first_token = True
+        completion = _state["llm"].chat.completions.create(
+            model=LLM_MODEL,
+            messages=[{"role": "user", "content": prepared["prompt"]}],
+            temperature=0.0,
+            max_tokens=4096,
+            stream=True,
+        )
+        for chunk in completion:
+            delta = getattr(chunk.choices[0].delta, "content", "") or ""
+            if delta:
+                if first_token:
+                    ttft = time.time() - llm_start_time
+                    logger.info(f"LLM TTFT (Time To First Token): {ttft:.2f}s")
+                    first_token = False
+                # SSE format
+                yield f"data: {json.dumps({'token': delta}, ensure_ascii=False)}\n\n"
+        total_time = time.time() - start_time
+        logger.info(f"Total request took: {total_time:.2f}s")
+        yield "data: [DONE]\n\n"
+    return StreamingResponse(stream(), media_type="text/event-stream")
+@app.get("/api/config")
+async def config():
+    """Provide frontend config (API key for same-origin frontend)."""
+    return {"api_key": FRONTEND_API_KEY}
+@app.get("/api/health")
+async def health():
+    return {"status": "ok"}
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(
+        "core.api.server:app",
+        host=os.getenv("API_HOST", "127.0.0.1"),
+        port=int(os.getenv("API_PORT", "8000")),
+        reload=False,
+    )

core/api/static/index.html ADDED Viewed

	@@ -0,0 +1,213 @@

+<!DOCTYPE html>
+<html lang="vi">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>HUST RAG — Trợ lý Học vụ</title>
+    <meta name="description" content="Hệ thống hỏi đáp quy chế sinh viên Đại học Bách khoa Hà Nội">
+    <link rel="preconnect" href="https://fonts.googleapis.com">
+    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
+    <link rel="stylesheet" href="/static/style.css">
+</head>
+<body>
+    <div id="app">
+        <!-- Header -->
+        <header>
+            <div class="logo">BK</div>
+            <div class="header-text">
+                <h1>HUST RAG Assistant</h1>
+                <p>Trợ lý học vụ Đại học Bách khoa Hà Nội</p>
+            </div>
+            <div class="status-dot" title="Online"></div>
+        </header>
+        <!-- Welcome Screen (shown initially) -->
+        <div id="welcome">
+            <div class="icon">🎓</div>
+            <h2>Xin chào!</h2>
+            <p>Tôi là trợ lý học vụ HUST. Hãy hỏi tôi bất kỳ câu hỏi nào về quy chế, quy định sinh viên.</p>
+            <div class="suggestions">
+                <button onclick="askSuggestion(this)">Điều kiện tốt nghiệp đại học?</button>
+                <button onclick="askSuggestion(this)">Cách tính điểm học kỳ?</button>
+                <button onclick="askSuggestion(this)">Điều kiện đổi ngành?</button>
+                <button onclick="askSuggestion(this)">Đăng ký hoãn thi thế nào?</button>
+            </div>
+        </div>
+        <!-- Chat Messages (hidden initially) -->
+        <div id="messages" style="display: none;"></div>
+        <!-- Input Area -->
+        <div id="input-area">
+            <div class="input-row">
+                <textarea id="input" rows="1" placeholder="Nhập câu hỏi của bạn..."
+                    onkeydown="handleKey(event)" oninput="autoResize(this)"></textarea>
+                <button id="send-btn" onclick="sendMessage()" title="Gửi">
+                    <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round">
+                        <line x1="22" y1="2" x2="11" y2="13"></line>
+                        <polygon points="22 2 15 22 11 13 2 9 22 2"></polygon>
+                    </svg>
+                </button>
+            </div>
+            <div class="hint">Enter để gửi · Shift+Enter để xuống dòng</div>
+        </div>
+    </div>
+    <script>
+        // ====== CẤU HÌNH ======
+        // Nếu mở HTML trực tiếp (double-click file): dùng URL đầy đủ
+        // Nếu mở qua server (http://127.0.0.1:8000): để rỗng ""
+        const API_BASE = "";
+        const messagesEl = document.getElementById('messages');
+        const welcomeEl = document.getElementById('welcome');
+        const inputEl = document.getElementById('input');
+        const sendBtn = document.getElementById('send-btn');
+        let isStreaming = false;
+        let _apiKey = "";
+        // Lấy API key từ server khi trang load
+        (async function loadConfig() {
+            try {
+                const res = await fetch(`${API_BASE}/api/config`);
+                const data = await res.json();
+                _apiKey = data.api_key || "";
+            } catch (e) {
+                console.warn("Could not load API config:", e);
+            }
+        })();
+        function autoResize(el) {
+            el.style.height = 'auto';
+            el.style.height = Math.min(el.scrollHeight, 120) + 'px';
+        }
+        function handleKey(e) {
+            if (e.key === 'Enter' && !e.shiftKey) {
+                e.preventDefault();
+                sendMessage();
+            }
+        }
+        function askSuggestion(btn) {
+            inputEl.value = btn.textContent;
+            sendMessage();
+        }
+        function renderMarkdown(text) {
+            let html = text
+                .replace(/```(\w*)\n([\s\S]*?)```/g, '<pre><code>$2</code></pre>')
+                .replace(/`([^`]+)`/g, '<code>$1</code>')
+                .replace(/\*\*(.+?)\*\*/g, '<strong>$1</strong>')
+                .replace(/\*(.+?)\*/g, '<em>$1</em>')
+                .replace(/^### (.+)$/gm, '<h3>$1</h3>')
+                .replace(/^## (.+)$/gm, '<h2>$1</h2>')
+                .replace(/^# (.+)$/gm, '<h1>$1</h1>')
+                .replace(/^[-*] (.+)$/gm, '<li>$1</li>')
+                .replace(/^\d+\. (.+)$/gm, '<li>$1</li>')
+                .replace(/\n{2,}/g, '</p><p>')
+                .replace(/\n/g, '<br>');
+            html = html.replace(/((?:<li>.*?<\/li>\s*)+)/gs, '<ul>$1</ul>');
+            return `<p>${html}</p>`.replace(/<p><\/p>/g, '');
+        }
+        async function sendMessage() {
+            const text = inputEl.value.trim();
+            if (!text || isStreaming) return;
+            welcomeEl.style.display = 'none';
+            messagesEl.style.display = 'flex';
+            const userMsg = document.createElement('div');
+            userMsg.className = 'msg user';
+            userMsg.textContent = text;
+            messagesEl.appendChild(userMsg);
+            inputEl.value = '';
+            inputEl.style.height = 'auto';
+            scrollToBottom();
+            const botMsg = document.createElement('div');
+            botMsg.className = 'msg bot';
+            botMsg.innerHTML = `
+                <div class="label">Trợ lý HUST</div>
+                <div class="content">
+                    <div class="typing-indicator">
+                        <span></span><span></span><span></span>
+                    </div>
+                </div>`;
+            messagesEl.appendChild(botMsg);
+            scrollToBottom();
+            const contentEl = botMsg.querySelector('.content');
+            isStreaming = true;
+            sendBtn.disabled = true;
+            try {
+                const headers = { 'Content-Type': 'application/json' };
+                if (_apiKey) headers['X-API-Key'] = _apiKey;
+                const res = await fetch(`${API_BASE}/api/chat`, {
+                    method: 'POST',
+                    headers: headers,
+                    body: JSON.stringify({ message: text }),
+                });
+                if (res.headers.get('content-type')?.includes('application/json')) {
+                    const data = await res.json();
+                    contentEl.innerHTML = renderMarkdown(data.answer || data.error || 'Không có phản hồi.');
+                    scrollToBottom();
+                    return;
+                }
+                const reader = res.body.getReader();
+                const decoder = new TextDecoder();
+                let fullText = '';
+                while (true) {
+                    const { done, value } = await reader.read();
+                    if (done) break;
+                    const chunk = decoder.decode(value, { stream: true });
+                    const lines = chunk.split('\n');
+                    for (const line of lines) {
+                        if (line.startsWith('data: ')) {
+                            const payload = line.slice(6).trim();
+                            if (payload === '[DONE]') continue;
+                            try {
+                                const parsed = JSON.parse(payload);
+                                if (parsed.token) {
+                                    fullText += parsed.token;
+                                    contentEl.innerHTML = renderMarkdown(fullText);
+                                    scrollToBottom();
+                                }
+                            } catch {}
+                        }
+                    }
+                }
+                if (fullText) {
+                    contentEl.innerHTML = renderMarkdown(fullText);
+                }
+            } catch (err) {
+                contentEl.innerHTML = '<span style="color: var(--red-accent)">Lỗi kết nối. Vui lòng thử lại.</span>';
+            } finally {
+                isStreaming = false;
+                sendBtn.disabled = false;
+                scrollToBottom();
+                inputEl.focus();
+            }
+        }
+        function scrollToBottom() {
+            requestAnimationFrame(() => {
+                messagesEl.scrollTop = messagesEl.scrollHeight;
+            });
+        }
+        inputEl.focus();
+    </script>
+</body>
+</html>

core/api/static/style.css ADDED Viewed

	@@ -0,0 +1,417 @@

+/* ===== DESIGN TOKENS ===== */
+:root {
+    --bg-primary: #0a0e1a;
+    --bg-secondary: #111827;
+    --bg-card: rgba(17, 24, 39, 0.7);
+    --bg-user-msg: #1d2951;
+    --bg-bot-msg: rgba(30, 41, 59, 0.6);
+    --bg-input: rgba(15, 23, 42, 0.8);
+    --border-color: rgba(99, 102, 241, 0.15);
+    --border-focus: rgba(99, 102, 241, 0.5);
+    --text-primary: #e2e8f0;
+    --text-secondary: #94a3b8;
+    --text-muted: #64748b;
+    --accent: #6366f1;
+    --accent-light: #818cf8;
+    --accent-glow: rgba(99, 102, 241, 0.25);
+    --red-accent: #dc2626;
+    --green-accent: #22c55e;
+    --radius: 12px;
+    --radius-lg: 16px;
+    --radius-pill: 24px;
+    --shadow-sm: 0 1px 3px rgba(0,0,0,0.3);
+    --shadow-md: 0 4px 20px rgba(0,0,0,0.4);
+    --shadow-glow: 0 0 30px var(--accent-glow);
+    --transition: 0.2s cubic-bezier(0.4, 0, 0.2, 1);
+    --font: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
+}
+/* ===== RESET & BASE ===== */
+*, *::before, *::after {
+    margin: 0; padding: 0; box-sizing: border-box;
+}
+html, body {
+    height: 100%;
+    font-family: var(--font);
+    background: var(--bg-primary);
+    color: var(--text-primary);
+    overflow: hidden;
+    -webkit-font-smoothing: antialiased;
+}
+/* ===== ANIMATED BACKGROUND ===== */
+body::before {
+    content: '';
+    position: fixed;
+    inset: 0;
+    background:
+        radial-gradient(ellipse 600px 400px at 20% 20%, rgba(99, 102, 241, 0.08), transparent),
+        radial-gradient(ellipse 500px 500px at 80% 80%, rgba(139, 92, 246, 0.06), transparent),
+        radial-gradient(ellipse 400px 300px at 50% 50%, rgba(59, 130, 246, 0.04), transparent);
+    pointer-events: none;
+    z-index: 0;
+    animation: bgPulse 8s ease-in-out infinite alternate;
+}
+@keyframes bgPulse {
+    0% { opacity: 0.7; }
+    100% { opacity: 1; }
+}
+/* ===== APP LAYOUT ===== */
+#app {
+    display: flex;
+    flex-direction: column;
+    height: 100vh;
+    max-width: 860px;
+    margin: 0 auto;
+    position: relative;
+    z-index: 1;
+}
+/* ===== HEADER ===== */
+header {
+    display: flex;
+    align-items: center;
+    gap: 14px;
+    padding: 16px 24px;
+    backdrop-filter: blur(20px);
+    background: rgba(10, 14, 26, 0.85);
+    border-bottom: 1px solid var(--border-color);
+    flex-shrink: 0;
+}
+.logo {
+    width: 42px; height: 42px;
+    border-radius: var(--radius);
+    background: linear-gradient(135deg, var(--accent), #8b5cf6);
+    display: flex; align-items: center; justify-content: center;
+    font-weight: 700; font-size: 18px; color: #fff;
+    box-shadow: var(--shadow-glow);
+    flex-shrink: 0;
+}
+.header-text h1 {
+    font-size: 17px;
+    font-weight: 600;
+    letter-spacing: -0.3px;
+}
+.header-text p {
+    font-size: 12px;
+    color: var(--text-muted);
+    margin-top: 1px;
+}
+.status-dot {
+    width: 8px; height: 8px;
+    border-radius: 50%;
+    background: var(--green-accent);
+    box-shadow: 0 0 8px rgba(34, 197, 94, 0.5);
+    margin-left: auto;
+    flex-shrink: 0;
+    animation: pulse 2s ease-in-out infinite;
+}
+@keyframes pulse {
+    0%, 100% { opacity: 1; transform: scale(1); }
+    50% { opacity: 0.6; transform: scale(0.85); }
+}
+/* ===== CHAT MESSAGES ===== */
+#messages {
+    flex: 1;
+    overflow-y: auto;
+    padding: 20px 24px;
+    display: flex;
+    flex-direction: column;
+    gap: 6px;
+    scroll-behavior: smooth;
+}
+#messages::-webkit-scrollbar { width: 5px; }
+#messages::-webkit-scrollbar-track { background: transparent; }
+#messages::-webkit-scrollbar-thumb {
+    background: rgba(99, 102, 241, 0.2);
+    border-radius: 10px;
+}
+.msg {
+    max-width: 85%;
+    padding: 12px 16px;
+    border-radius: var(--radius-lg);
+    line-height: 1.65;
+    font-size: 14.5px;
+    animation: msgIn 0.3s ease-out both;
+    word-wrap: break-word;
+}
+@keyframes msgIn {
+    from { opacity: 0; transform: translateY(10px); }
+    to { opacity: 1; transform: translateY(0); }
+}
+.msg.user {
+    align-self: flex-end;
+    background: var(--bg-user-msg);
+    border: 1px solid rgba(99, 102, 241, 0.2);
+    border-bottom-right-radius: 4px;
+    color: #c7d2fe;
+}
+.msg.bot {
+    align-self: flex-start;
+    background: var(--bg-bot-msg);
+    backdrop-filter: blur(10px);
+    border: 1px solid var(--border-color);
+    border-bottom-left-radius: 4px;
+}
+.msg.bot .label {
+    font-size: 11px;
+    font-weight: 600;
+    color: var(--accent-light);
+    margin-bottom: 6px;
+    letter-spacing: 0.3px;
+    text-transform: uppercase;
+}
+/* Markdown rendering inside bot messages */
+.msg.bot h1, .msg.bot h2, .msg.bot h3 {
+    margin: 14px 0 6px;
+    font-weight: 600;
+    color: #e2e8f0;
+}
+.msg.bot h1 { font-size: 16px; }
+.msg.bot h2 { font-size: 15px; }
+.msg.bot h3 { font-size: 14px; }
+.msg.bot p { margin: 4px 0; }
+.msg.bot ul, .msg.bot ol {
+    margin: 6px 0 6px 20px;
+}
+.msg.bot li {
+    margin-bottom: 3px;
+}
+.msg.bot strong {
+    color: #c7d2fe;
+    font-weight: 600;
+}
+.msg.bot code {
+    background: rgba(99, 102, 241, 0.15);
+    padding: 2px 6px;
+    border-radius: 4px;
+    font-size: 13px;
+    font-family: 'SF Mono', 'Fira Code', monospace;
+}
+.msg.bot pre {
+    background: rgba(0, 0, 0, 0.3);
+    padding: 12px;
+    border-radius: 8px;
+    overflow-x: auto;
+    margin: 8px 0;
+}
+.msg.bot pre code {
+    background: none;
+    padding: 0;
+}
+.msg.bot table {
+    width: 100%;
+    border-collapse: collapse;
+    margin: 8px 0;
+    font-size: 13px;
+}
+.msg.bot th, .msg.bot td {
+    padding: 6px 10px;
+    border: 1px solid rgba(99, 102, 241, 0.15);
+    text-align: left;
+}
+.msg.bot th {
+    background: rgba(99, 102, 241, 0.1);
+    font-weight: 600;
+    color: #c7d2fe;
+}
+/* Typing indicator */
+.typing-indicator {
+    display: flex;
+    gap: 5px;
+    padding: 4px 0;
+}
+.typing-indicator span {
+    width: 7px; height: 7px;
+    border-radius: 50%;
+    background: var(--accent-light);
+    opacity: 0.4;
+    animation: blink 1.4s ease-in-out infinite;
+}
+.typing-indicator span:nth-child(2) { animation-delay: 0.2s; }
+.typing-indicator span:nth-child(3) { animation-delay: 0.4s; }
+@keyframes blink {
+    0%, 100% { opacity: 0.3; transform: scale(0.85); }
+    50% { opacity: 1; transform: scale(1); }
+}
+/* ===== WELCOME SCREEN ===== */
+#welcome {
+    flex: 1;
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    justify-content: center;
+    padding: 40px 24px;
+    text-align: center;
+}
+#welcome .icon {
+    width: 72px; height: 72px;
+    border-radius: 20px;
+    background: linear-gradient(135deg, var(--accent), #8b5cf6);
+    display: flex; align-items: center; justify-content: center;
+    font-size: 32px;
+    box-shadow: var(--shadow-glow);
+    margin-bottom: 20px;
+}
+#welcome h2 {
+    font-size: 22px;
+    font-weight: 600;
+    margin-bottom: 8px;
+    letter-spacing: -0.3px;
+}
+#welcome p {
+    font-size: 14px;
+    color: var(--text-secondary);
+    max-width: 380px;
+    line-height: 1.6;
+}
+.suggestions {
+    display: flex;
+    flex-wrap: wrap;
+    gap: 8px;
+    margin-top: 28px;
+    justify-content: center;
+    max-width: 520px;
+}
+.suggestions button {
+    background: var(--bg-bot-msg);
+    backdrop-filter: blur(10px);
+    border: 1px solid var(--border-color);
+    color: var(--text-secondary);
+    font-family: var(--font);
+    font-size: 13px;
+    padding: 9px 16px;
+    border-radius: var(--radius-pill);
+    cursor: pointer;
+    transition: var(--transition);
+    white-space: nowrap;
+}
+.suggestions button:hover {
+    border-color: var(--accent);
+    color: var(--accent-light);
+    background: rgba(99, 102, 241, 0.08);
+    transform: translateY(-1px);
+}
+/* ===== INPUT AREA ===== */
+#input-area {
+    padding: 16px 24px 20px;
+    backdrop-filter: blur(20px);
+    background: rgba(10, 14, 26, 0.85);
+    border-top: 1px solid var(--border-color);
+    flex-shrink: 0;
+}
+.input-row {
+    display: flex;
+    align-items: flex-end;
+    gap: 10px;
+    background: var(--bg-input);
+    border: 1px solid var(--border-color);
+    border-radius: var(--radius-lg);
+    padding: 6px 6px 6px 16px;
+    transition: var(--transition);
+}
+.input-row:focus-within {
+    border-color: var(--border-focus);
+    box-shadow: var(--shadow-glow);
+}
+#input {
+    flex: 1;
+    background: transparent;
+    border: none;
+    outline: none;
+    color: var(--text-primary);
+    font-family: var(--font);
+    font-size: 14.5px;
+    resize: none;
+    max-height: 120px;
+    line-height: 1.5;
+    padding: 8px 0;
+}
+#input::placeholder {
+    color: var(--text-muted);
+}
+#send-btn {
+    width: 40px; height: 40px;
+    border-radius: var(--radius);
+    border: none;
+    background: linear-gradient(135deg, var(--accent), #8b5cf6);
+    color: #fff;
+    cursor: pointer;
+    display: flex; align-items: center; justify-content: center;
+    flex-shrink: 0;
+    transition: var(--transition);
+}
+#send-btn:hover:not(:disabled) {
+    transform: scale(1.05);
+    box-shadow: var(--shadow-glow);
+}
+#send-btn:disabled {
+    opacity: 0.35;
+    cursor: default;
+}
+#send-btn svg {
+    width: 18px; height: 18px;
+}
+.hint {
+    font-size: 11px;
+    text-align: center;
+    color: var(--text-muted);
+    margin-top: 8px;
+}
+/* ===== RESPONSIVE ===== */
+@media (max-width: 640px) {
+    header { padding: 12px 16px; }
+    #messages { padding: 14px 16px; }
+    #input-area { padding: 12px 16px 16px; }
+    .msg { max-width: 92%; font-size: 14px; }
+    #welcome h2 { font-size: 19px; }
+    .suggestions { flex-direction: column; align-items: center; }
+}

core/gradio/user_gradio.py CHANGED Viewed

@@ -17,8 +17,8 @@ if str(REPO_ROOT) not in sys.path:
 @dataclass
 class GradioConfig:
-    server_host: str = "127.0.0.1"
-    server_port: int = 7860
 def _load_env() -> None:
@@ -219,5 +219,6 @@ if __name__ == "__main__":
     print(f"{'='*60}\n")
     demo.launch(
         server_name=GRADIO_CFG.server_host,
-        server_port=GRADIO_CFG.server_port
     )

 @dataclass
 class GradioConfig:
+    server_host: str = os.getenv("GRADIO_HOST", "127.0.0.1")
+    server_port: int = int(os.getenv("GRADIO_PORT", "7860"))
 def _load_env() -> None:
     print(f"{'='*60}\n")
     demo.launch(
         server_name=GRADIO_CFG.server_host,
+        server_port=GRADIO_CFG.server_port,
+        share=True,
     )

core/preprocessing/docling_processor.py CHANGED Viewed

@@ -107,6 +107,11 @@ class DoclingProcessor:
             return None
     def parse_directory(self, source_dir: str) -> dict:
         self.logger.info(f"Found {len(pdf_files)} PDF files in {source_dir}")
         results = {"total": len(pdf_files), "parsed": 0, "skipped": 0, "errors": 0}

             return None
     def parse_directory(self, source_dir: str) -> dict:
+        source_path = Path(source_dir)
+        if not source_path.exists():
+            raise FileNotFoundError(f"Source directory not found: {source_dir}")
+        pdf_files = sorted(source_path.rglob("*.pdf"))
         self.logger.info(f"Found {len(pdf_files)} PDF files in {source_dir}")
         results = {"total": len(pdf_files), "parsed": 0, "skipped": 0, "errors": 0}

core/rag/embedding_model.py CHANGED Viewed

@@ -14,8 +14,8 @@ logger = logging.getLogger(__name__)
 @dataclass
 class EmbeddingConfig:
     api_base_url: str = "https://api.siliconflow.com/v1"
-    model: str = "Qwen/Qwen3-Embedding-4B"
-    dimension: int = 2048
     batch_size: int = 16

 @dataclass
 class EmbeddingConfig:
     api_base_url: str = "https://api.siliconflow.com/v1"
+    model: str = "Qwen/Qwen3-Embedding-8B"
+    dimension: int = 4096
     batch_size: int = 16

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,14 @@

+services:
+  backend:
+    build: .
+    ports:
+      - "8000:8000"
+    volumes:
+      - ./data:/app/data        # Mount data (ChromaDB + markdown files)
+      - ./.env:/app/.env        # Mount API keys
+    environment:
+      - API_HOST=0.0.0.0
+      - API_PORT=8000
+      - FRONTEND_API_KEY=${FRONTEND_API_KEY}    # API key để xác thực request
+      - RATE_LIMIT_MAX=30                        # Max requests/phút/IP
+    restart: unless-stopped

evaluation/eval_utils.py CHANGED Viewed

@@ -48,12 +48,12 @@ def init_rag() -> tuple[RAGGenerator, QwenEmbeddings, OpenAI]:
     retriever = Retriever(vector_db=db)
     rag = RAGGenerator(retriever=retriever)
-    # Initialize LLM client
-    api_key = os.getenv("SILICONFLOW_API_KEY", "").strip()
     if not api_key:
-        raise ValueError("Missing SILICONFLOW_API_KEY")
-    llm_client = OpenAI(api_key=api_key, base_url="https://api.siliconflow.com/v1", timeout=60.0)
     return rag, embeddings, llm_client
@@ -61,7 +61,7 @@ def generate_answers(
     rag: RAGGenerator,
     questions: list,
     llm_client: OpenAI,
-    llm_model: str = "nex-agi/DeepSeek-V3.1-Nex-N1",
     retrieval_mode: str = "hybrid_rerank",
     max_workers: int = 8,
 ) -> tuple[list, list]:

     retriever = Retriever(vector_db=db)
     rag = RAGGenerator(retriever=retriever)
+    # Initialize LLM client (same as production: Groq API)
+    api_key = os.getenv("GROQ_API_KEY", "").strip()
     if not api_key:
+        raise ValueError("Missing GROQ_API_KEY")
+    llm_client = OpenAI(api_key=api_key, base_url="https://api.groq.com/openai/v1", timeout=60.0)
     return rag, embeddings, llm_client
     rag: RAGGenerator,
     questions: list,
     llm_client: OpenAI,
+    llm_model: str = "qwen/qwen3-32b",
     retrieval_mode: str = "hybrid_rerank",
     max_workers: int = 8,
 ) -> tuple[list, list]:

evaluation/ragas_eval.py CHANGED Viewed

@@ -24,7 +24,7 @@ from evaluation.eval_utils import load_csv_data, init_rag, generate_answers
 # Configuration
 CSV_PATH = "data/data.csv"
 OUTPUT_DIR = "evaluation/results"
-LLM_MODEL = os.getenv("EVAL_LLM_MODEL", "nex-agi/DeepSeek-V3.1-Nex-N1")
 API_BASE = "https://api.siliconflow.com/v1"

 # Configuration
 CSV_PATH = "data/data.csv"
 OUTPUT_DIR = "evaluation/results"
+LLM_MODEL = os.getenv("EVAL_LLM_MODEL", "qwen/qwen3-32b")
 API_BASE = "https://api.siliconflow.com/v1"

requirements.txt CHANGED Viewed

@@ -19,6 +19,8 @@ numpy==2.2.6
 # UI
 gradio==6.2.0
 # Evaluation
 ragas==0.4.2

 # UI
 gradio==6.2.0
+fastapi==0.115.12
+uvicorn==0.34.2
 # Evaluation
 ragas==0.4.2

setup.bat CHANGED Viewed

@@ -31,5 +31,5 @@ if not exist ".env" (
 echo.
 echo Setup complete!
-echo Run: venv\Scripts\activate ^& python scripts\run_app.py
 pause

 echo.
 echo Setup complete!
+echo Run: venv\Scripts\activate ^& python core/api/server.py
 pause

setup.sh CHANGED Viewed

@@ -35,4 +35,4 @@ fi
 echo ""
 echo "Setup complete!"
-echo "Run: source venv/bin/activate && python scripts/run_app.py"

 echo ""
 echo "Setup complete!"
+echo "Run: source venv/bin/activate && python core/api/server.py"