hungnha commited on
Commit
4f9286e
·
1 Parent(s): 225bdac

build server

Browse files
.dockerignore ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .git
2
+ .gitignore
3
+ .env
4
+ .gradio
5
+ __pycache__
6
+ *.pyc
7
+ *.pyo
8
+ *.egg-info
9
+ venv/
10
+ .venv/
11
+ data/
12
+ test/
13
+ *.md
14
+ *.bat
15
+ setup.sh
16
+ .dockerignore
17
+ Dockerfile
18
+ docker-compose.yml
19
+ .pixi/
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ===== HUST RAG Backend =====
2
+ FROM python:3.11-slim
3
+
4
+ WORKDIR /app
5
+
6
+ # Install dependencies first (cached layer)
7
+ COPY requirements.txt .
8
+ RUN pip install --no-cache-dir -r requirements.txt
9
+
10
+ # Copy project code
11
+ COPY core/ core/
12
+ COPY scripts/ scripts/
13
+ COPY evaluation/ evaluation/
14
+
15
+ # Create data directory (mount at runtime)
16
+ RUN mkdir -p data
17
+
18
+ # Expose API port
19
+ EXPOSE 8000
20
+
21
+ # Environment variables (override at runtime)
22
+ ENV GROQ_API_KEY=""
23
+ ENV SILICONFLOW_API_KEY=""
24
+ ENV API_HOST="0.0.0.0"
25
+ ENV API_PORT="8000"
26
+
27
+ # Run download_data.py first (checks if data exists, downloads if not), then start FastAPI server
28
+ CMD python scripts/download_data.py && python core/api/server.py
README.md CHANGED
@@ -82,13 +82,8 @@ DoAn/
82
  │ └── ragas_eval.py # RAGAS evaluation with multiple metrics
83
 
84
  ├── test/ # Unit tests
85
- │ ├── conftest.py # Shared fixtures and sample data
86
  │ ├── test_chunk.py # Chunking logic tests
87
- ├── test_embedding.py # Embedding model tests
88
- │ ├── test_vector_store.py # Vector store tests
89
- │ ├── test_retrieval.py # Retrieval pipeline tests
90
- │ ├── test_generator.py # Generator/context builder tests
91
- │ └── ...
92
 
93
  ├── data/ # Data directory (downloaded from HuggingFace)
94
  │ ├── data_process/ # Processed markdown files
@@ -158,6 +153,95 @@ python scripts/run_app.py
158
 
159
  Access the chat interface at: **http://127.0.0.1:7860**
160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  ---
162
 
163
  ## 📖 Usage Guide
 
82
  │ └── ragas_eval.py # RAGAS evaluation with multiple metrics
83
 
84
  ├── test/ # Unit tests
 
85
  │ ├── test_chunk.py # Chunking logic tests
86
+
 
 
 
 
87
 
88
  ├── data/ # Data directory (downloaded from HuggingFace)
89
  │ ├── data_process/ # Processed markdown files
 
153
 
154
  Access the chat interface at: **http://127.0.0.1:7860**
155
 
156
+ ### Running with FastAPI (API Mode)
157
+
158
+ ```bash
159
+ source venv/bin/activate
160
+ python core/api/server.py
161
+ ```
162
+
163
+ - API server: **http://127.0.0.1:8000**
164
+ - Chat UI: **http://127.0.0.1:8000/** or open `core/api/static/index.html` directly
165
+ - API endpoint: `POST /api/chat` with `{"message": "your question"}`
166
+
167
+ ---
168
+
169
+ ## 🐳 Docker Deployment
170
+
171
+ ### Quick Start (Docker Compose)
172
+
173
+ ```bash
174
+ # 1. Make sure data/ folder exists (download first if needed)
175
+ python scripts/download_data.py
176
+
177
+ # 2. Create .env with API keys
178
+ echo "SILICONFLOW_API_KEY=your_key" > .env
179
+ echo "GROQ_API_KEY=your_key" >> .env
180
+
181
+ # 3. Build and run
182
+ docker compose up --build -d
183
+
184
+ # Access at http://localhost:8000
185
+ ```
186
+
187
+ ### Manual Docker Build & Run
188
+
189
+ ```bash
190
+ # Build image
191
+ docker build -t hust-rag-api .
192
+
193
+ # Run container
194
+ docker run -d \
195
+ -p 8000:8000 \
196
+ -v $(pwd)/data:/app/data \
197
+ --env-file .env \
198
+ --name hust-rag \
199
+ hust-rag-api
200
+ ```
201
+
202
+ ### Deploy to AWS (ECR + EC2)
203
+
204
+ **Step 1 — Build & push image to ECR:**
205
+
206
+ ```bash
207
+ # Login to ECR
208
+ aws ecr get-login-password --region ap-southeast-1 | \
209
+ docker login --username AWS --password-stdin <ACCOUNT_ID>.dkr.ecr.ap-southeast-1.amazonaws.com
210
+
211
+ # Create repository (first time only)
212
+ aws ecr create-repository --repository-name hust-rag-api
213
+
214
+ # Tag and push
215
+ docker tag hust-rag-api:latest <ACCOUNT_ID>.dkr.ecr.ap-southeast-1.amazonaws.com/hust-rag-api:latest
216
+ docker push <ACCOUNT_ID>.dkr.ecr.ap-southeast-1.amazonaws.com/hust-rag-api:latest
217
+ ```
218
+
219
+ **Step 2 — Run on EC2:**
220
+
221
+ ```bash
222
+ # Pull image
223
+ docker pull <ACCOUNT_ID>.dkr.ecr.ap-southeast-1.amazonaws.com/hust-rag-api:latest
224
+
225
+ # Upload data to EC2
226
+ scp -r data/ ec2-user@<EC2_IP>:/home/ec2-user/data
227
+
228
+ # Run container
229
+ docker run -d \
230
+ -p 8000:8000 \
231
+ -v /home/ec2-user/data:/app/data \
232
+ -e GROQ_API_KEY=your_key \
233
+ -e SILICONFLOW_API_KEY=your_key \
234
+ --restart unless-stopped \
235
+ --name hust-rag \
236
+ hust-rag-api:latest
237
+ ```
238
+
239
+ ### Docker Notes
240
+
241
+ - The `data/` directory is **mounted as a volume** — not baked into the image
242
+ - API keys are passed via environment variables or `.env` file — never stored in the image
243
+ - To update: rebuild image → push → pull on EC2 → restart container
244
+
245
  ---
246
 
247
  ## 📖 Usage Guide
core/api/__init__.py ADDED
File without changes
core/api/server.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import os
3
+ import sys
4
+ import re
5
+ import json
6
+ import logging
7
+ import time as _time
8
+ from collections import defaultdict
9
+ from pathlib import Path
10
+ from contextlib import asynccontextmanager
11
+
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+ from fastapi import FastAPI, Request, Depends, HTTPException, Security
16
+ from fastapi.responses import StreamingResponse, JSONResponse
17
+ from fastapi.middleware.cors import CORSMiddleware
18
+ from fastapi.security import APIKeyHeader
19
+ from dotenv import find_dotenv, load_dotenv
20
+ from openai import OpenAI
21
+
22
+ # Setup path & env
23
+ REPO_ROOT = Path(__file__).resolve().parents[2]
24
+ if str(REPO_ROOT) not in sys.path:
25
+ sys.path.insert(0, str(REPO_ROOT))
26
+
27
+ load_dotenv(find_dotenv(usecwd=True))
28
+
29
+ from core.rag.embedding_model import EmbeddingConfig, QwenEmbeddings
30
+ from core.rag.vector_store import ChromaConfig, ChromaVectorDB
31
+ from core.rag.retrieval import Retriever, RetrievalMode, get_retrieval_config
32
+ from core.rag.generator import RAGContextBuilder, SYSTEM_PROMPT
33
+
34
+ # Config
35
+ RETRIEVAL_MODE = RetrievalMode.HYBRID_RERANK
36
+ RETRIEVAL_CFG = get_retrieval_config()
37
+ LLM_MODEL = os.getenv("LLM_MODEL", "qwen/qwen3-32b")
38
+ LLM_API_BASE = "https://api.groq.com/openai/v1"
39
+
40
+ # Shared state
41
+ _state = {}
42
+
43
+
44
+ def _filter_think_tags(text: str) -> str:
45
+ return re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL).strip()
46
+
47
+
48
+ @asynccontextmanager
49
+ async def lifespan(app: FastAPI):
50
+ """Initialize RAG resources on startup."""
51
+ print("⏳ Initializing RAG pipeline...")
52
+ emb = QwenEmbeddings(EmbeddingConfig())
53
+ db = ChromaVectorDB(embedder=emb, config=ChromaConfig())
54
+ retriever = Retriever(vector_db=db)
55
+
56
+ api_key = (os.getenv("GROQ_API_KEY") or "").strip()
57
+ if not api_key:
58
+ raise RuntimeError("Missing GROQ_API_KEY")
59
+
60
+ _state["rag"] = RAGContextBuilder(retriever=retriever)
61
+ _state["llm"] = OpenAI(api_key=api_key, base_url=LLM_API_BASE)
62
+ print("✅ Ready!")
63
+ yield
64
+ _state.clear()
65
+
66
+
67
+ app = FastAPI(title="HUST RAG API", lifespan=lifespan)
68
+
69
+ # ── Security: CORS ──────────────────────────────────────────────
70
+ # Chỉ cho phép frontend cùng origin hoặc origins cụ thể
71
+ ALLOWED_ORIGINS = os.getenv("ALLOWED_ORIGINS", "").split(",")
72
+ ALLOWED_ORIGINS = [o.strip() for o in ALLOWED_ORIGINS if o.strip()] or ["*"]
73
+
74
+ app.add_middleware(
75
+ CORSMiddleware,
76
+ allow_origins=ALLOWED_ORIGINS,
77
+ allow_methods=["GET", "POST"],
78
+ allow_headers=["Content-Type", "X-API-Key"],
79
+ )
80
+
81
+ # ── Security: API Key Authentication ────────────────────────────
82
+ _api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False)
83
+ FRONTEND_API_KEY = os.getenv("FRONTEND_API_KEY", "").strip()
84
+
85
+
86
+ async def verify_api_key(api_key: str = Security(_api_key_header)):
87
+ """Verify the API key from request header."""
88
+ if not FRONTEND_API_KEY:
89
+ # Nếu chưa đặt FRONTEND_API_KEY thì bỏ qua (dev mode)
90
+ return None
91
+ if api_key != FRONTEND_API_KEY:
92
+ raise HTTPException(status_code=403, detail="Invalid or missing API key")
93
+ return api_key
94
+
95
+
96
+ # ── Security: Rate Limiting (in-memory) ─────────────────────────
97
+ RATE_LIMIT_WINDOW = 60 # seconds
98
+ RATE_LIMIT_MAX = int(os.getenv("RATE_LIMIT_MAX", "30")) # max requests per window
99
+ _rate_limit_store: dict[str, list[float]] = defaultdict(list)
100
+
101
+
102
+ async def rate_limit(request: Request):
103
+ """Simple per-IP rate limiter."""
104
+ client_ip = request.client.host if request.client else "unknown"
105
+ now = _time.time()
106
+ # Cleanup old entries
107
+ _rate_limit_store[client_ip] = [
108
+ t for t in _rate_limit_store[client_ip] if now - t < RATE_LIMIT_WINDOW
109
+ ]
110
+ if len(_rate_limit_store[client_ip]) >= RATE_LIMIT_MAX:
111
+ raise HTTPException(
112
+ status_code=429,
113
+ detail=f"Rate limit exceeded. Max {RATE_LIMIT_MAX} requests per minute."
114
+ )
115
+ _rate_limit_store[client_ip].append(now)
116
+ return client_ip
117
+
118
+ # Serve static files (CSS, JS, images, etc.)
119
+ STATIC_DIR = Path(__file__).parent / "static"
120
+ from fastapi.staticfiles import StaticFiles
121
+ from fastapi.responses import FileResponse
122
+
123
+ app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
124
+
125
+
126
+ @app.get("/")
127
+ async def index():
128
+ """Serve the chat UI."""
129
+ return FileResponse(str(STATIC_DIR / "index.html"))
130
+
131
+
132
+ @app.post("/api/chat")
133
+ async def chat(
134
+ request: Request,
135
+ _key: str = Depends(verify_api_key),
136
+ _ip: str = Depends(rate_limit),
137
+ ):
138
+ """Chat endpoint with Server-Sent Events streaming."""
139
+ body = await request.json()
140
+ question = (body.get("message") or "").strip()
141
+
142
+ if not question:
143
+ return JSONResponse({"error": "Empty message"}, status_code=400)
144
+
145
+ # Retrieve context
146
+ import time
147
+ start_time = time.time()
148
+ logger.info(f"Start retrieval for question: {question}")
149
+ prepared = _state["rag"].retrieve_and_prepare(
150
+ question,
151
+ k=RETRIEVAL_CFG.top_k,
152
+ initial_k=RETRIEVAL_CFG.initial_k,
153
+ mode=RETRIEVAL_MODE.value,
154
+ )
155
+
156
+ if not prepared["results"]:
157
+ return JSONResponse({"answer": "Xin lỗi, tôi không tìm thấy thông tin phù hợp."})
158
+
159
+ retrieval_time = time.time() - start_time
160
+ logger.info(f"Retrieval took {retrieval_time:.2f}s")
161
+
162
+ def stream():
163
+ llm_start_time = time.time()
164
+ first_token = True
165
+ completion = _state["llm"].chat.completions.create(
166
+ model=LLM_MODEL,
167
+ messages=[{"role": "user", "content": prepared["prompt"]}],
168
+ temperature=0.0,
169
+ max_tokens=4096,
170
+ stream=True,
171
+ )
172
+ for chunk in completion:
173
+ delta = getattr(chunk.choices[0].delta, "content", "") or ""
174
+ if delta:
175
+ if first_token:
176
+ ttft = time.time() - llm_start_time
177
+ logger.info(f"LLM TTFT (Time To First Token): {ttft:.2f}s")
178
+ first_token = False
179
+ # SSE format
180
+ yield f"data: {json.dumps({'token': delta}, ensure_ascii=False)}\n\n"
181
+ total_time = time.time() - start_time
182
+ logger.info(f"Total request took: {total_time:.2f}s")
183
+ yield "data: [DONE]\n\n"
184
+
185
+ return StreamingResponse(stream(), media_type="text/event-stream")
186
+
187
+
188
+ @app.get("/api/config")
189
+ async def config():
190
+ """Provide frontend config (API key for same-origin frontend)."""
191
+ return {"api_key": FRONTEND_API_KEY}
192
+
193
+
194
+ @app.get("/api/health")
195
+ async def health():
196
+ return {"status": "ok"}
197
+
198
+
199
+ if __name__ == "__main__":
200
+ import uvicorn
201
+ uvicorn.run(
202
+ "core.api.server:app",
203
+ host=os.getenv("API_HOST", "127.0.0.1"),
204
+ port=int(os.getenv("API_PORT", "8000")),
205
+ reload=False,
206
+ )
core/api/static/index.html ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="vi">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>HUST RAG — Trợ lý Học vụ</title>
7
+ <meta name="description" content="Hệ thống hỏi đáp quy chế sinh viên Đại học Bách khoa Hà Nội">
8
+ <link rel="preconnect" href="https://fonts.googleapis.com">
9
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
10
+ <link rel="stylesheet" href="/static/style.css">
11
+ </head>
12
+ <body>
13
+ <div id="app">
14
+ <!-- Header -->
15
+ <header>
16
+ <div class="logo">BK</div>
17
+ <div class="header-text">
18
+ <h1>HUST RAG Assistant</h1>
19
+ <p>Trợ lý học vụ Đại học Bách khoa Hà Nội</p>
20
+ </div>
21
+ <div class="status-dot" title="Online"></div>
22
+ </header>
23
+
24
+ <!-- Welcome Screen (shown initially) -->
25
+ <div id="welcome">
26
+ <div class="icon">🎓</div>
27
+ <h2>Xin chào!</h2>
28
+ <p>Tôi là trợ lý học vụ HUST. Hãy hỏi tôi bất kỳ câu hỏi nào về quy chế, quy định sinh viên.</p>
29
+ <div class="suggestions">
30
+ <button onclick="askSuggestion(this)">Điều kiện tốt nghiệp đại học?</button>
31
+ <button onclick="askSuggestion(this)">Cách tính điểm học kỳ?</button>
32
+ <button onclick="askSuggestion(this)">Điều kiện đổi ngành?</button>
33
+ <button onclick="askSuggestion(this)">Đăng ký hoãn thi thế nào?</button>
34
+ </div>
35
+ </div>
36
+
37
+ <!-- Chat Messages (hidden initially) -->
38
+ <div id="messages" style="display: none;"></div>
39
+
40
+ <!-- Input Area -->
41
+ <div id="input-area">
42
+ <div class="input-row">
43
+ <textarea id="input" rows="1" placeholder="Nhập câu hỏi của bạn..."
44
+ onkeydown="handleKey(event)" oninput="autoResize(this)"></textarea>
45
+ <button id="send-btn" onclick="sendMessage()" title="Gửi">
46
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round">
47
+ <line x1="22" y1="2" x2="11" y2="13"></line>
48
+ <polygon points="22 2 15 22 11 13 2 9 22 2"></polygon>
49
+ </svg>
50
+ </button>
51
+ </div>
52
+ <div class="hint">Enter để gửi · Shift+Enter để xuống dòng</div>
53
+ </div>
54
+ </div>
55
+
56
+ <script>
57
+ // ====== CẤU HÌNH ======
58
+ // Nếu mở HTML trực tiếp (double-click file): dùng URL đầy đủ
59
+ // Nếu mở qua server (http://127.0.0.1:8000): để rỗng ""
60
+ const API_BASE = "";
61
+
62
+ const messagesEl = document.getElementById('messages');
63
+ const welcomeEl = document.getElementById('welcome');
64
+ const inputEl = document.getElementById('input');
65
+ const sendBtn = document.getElementById('send-btn');
66
+ let isStreaming = false;
67
+ let _apiKey = "";
68
+
69
+ // Lấy API key từ server khi trang load
70
+ (async function loadConfig() {
71
+ try {
72
+ const res = await fetch(`${API_BASE}/api/config`);
73
+ const data = await res.json();
74
+ _apiKey = data.api_key || "";
75
+ } catch (e) {
76
+ console.warn("Could not load API config:", e);
77
+ }
78
+ })();
79
+
80
+ function autoResize(el) {
81
+ el.style.height = 'auto';
82
+ el.style.height = Math.min(el.scrollHeight, 120) + 'px';
83
+ }
84
+
85
+ function handleKey(e) {
86
+ if (e.key === 'Enter' && !e.shiftKey) {
87
+ e.preventDefault();
88
+ sendMessage();
89
+ }
90
+ }
91
+
92
+ function askSuggestion(btn) {
93
+ inputEl.value = btn.textContent;
94
+ sendMessage();
95
+ }
96
+
97
+ function renderMarkdown(text) {
98
+ let html = text
99
+ .replace(/```(\w*)\n([\s\S]*?)```/g, '<pre><code>$2</code></pre>')
100
+ .replace(/`([^`]+)`/g, '<code>$1</code>')
101
+ .replace(/\*\*(.+?)\*\*/g, '<strong>$1</strong>')
102
+ .replace(/\*(.+?)\*/g, '<em>$1</em>')
103
+ .replace(/^### (.+)$/gm, '<h3>$1</h3>')
104
+ .replace(/^## (.+)$/gm, '<h2>$1</h2>')
105
+ .replace(/^# (.+)$/gm, '<h1>$1</h1>')
106
+ .replace(/^[-*] (.+)$/gm, '<li>$1</li>')
107
+ .replace(/^\d+\. (.+)$/gm, '<li>$1</li>')
108
+ .replace(/\n{2,}/g, '</p><p>')
109
+ .replace(/\n/g, '<br>');
110
+
111
+ html = html.replace(/((?:<li>.*?<\/li>\s*)+)/gs, '<ul>$1</ul>');
112
+ return `<p>${html}</p>`.replace(/<p><\/p>/g, '');
113
+ }
114
+
115
+ async function sendMessage() {
116
+ const text = inputEl.value.trim();
117
+ if (!text || isStreaming) return;
118
+
119
+ welcomeEl.style.display = 'none';
120
+ messagesEl.style.display = 'flex';
121
+
122
+ const userMsg = document.createElement('div');
123
+ userMsg.className = 'msg user';
124
+ userMsg.textContent = text;
125
+ messagesEl.appendChild(userMsg);
126
+
127
+ inputEl.value = '';
128
+ inputEl.style.height = 'auto';
129
+ scrollToBottom();
130
+
131
+ const botMsg = document.createElement('div');
132
+ botMsg.className = 'msg bot';
133
+ botMsg.innerHTML = `
134
+ <div class="label">Trợ lý HUST</div>
135
+ <div class="content">
136
+ <div class="typing-indicator">
137
+ <span></span><span></span><span></span>
138
+ </div>
139
+ </div>`;
140
+ messagesEl.appendChild(botMsg);
141
+ scrollToBottom();
142
+
143
+ const contentEl = botMsg.querySelector('.content');
144
+ isStreaming = true;
145
+ sendBtn.disabled = true;
146
+
147
+ try {
148
+ const headers = { 'Content-Type': 'application/json' };
149
+ if (_apiKey) headers['X-API-Key'] = _apiKey;
150
+
151
+ const res = await fetch(`${API_BASE}/api/chat`, {
152
+ method: 'POST',
153
+ headers: headers,
154
+ body: JSON.stringify({ message: text }),
155
+ });
156
+
157
+ if (res.headers.get('content-type')?.includes('application/json')) {
158
+ const data = await res.json();
159
+ contentEl.innerHTML = renderMarkdown(data.answer || data.error || 'Không có phản hồi.');
160
+ scrollToBottom();
161
+ return;
162
+ }
163
+
164
+ const reader = res.body.getReader();
165
+ const decoder = new TextDecoder();
166
+ let fullText = '';
167
+
168
+ while (true) {
169
+ const { done, value } = await reader.read();
170
+ if (done) break;
171
+
172
+ const chunk = decoder.decode(value, { stream: true });
173
+ const lines = chunk.split('\n');
174
+
175
+ for (const line of lines) {
176
+ if (line.startsWith('data: ')) {
177
+ const payload = line.slice(6).trim();
178
+ if (payload === '[DONE]') continue;
179
+ try {
180
+ const parsed = JSON.parse(payload);
181
+ if (parsed.token) {
182
+ fullText += parsed.token;
183
+ contentEl.innerHTML = renderMarkdown(fullText);
184
+ scrollToBottom();
185
+ }
186
+ } catch {}
187
+ }
188
+ }
189
+ }
190
+
191
+ if (fullText) {
192
+ contentEl.innerHTML = renderMarkdown(fullText);
193
+ }
194
+ } catch (err) {
195
+ contentEl.innerHTML = '<span style="color: var(--red-accent)">Lỗi kết nối. Vui lòng thử lại.</span>';
196
+ } finally {
197
+ isStreaming = false;
198
+ sendBtn.disabled = false;
199
+ scrollToBottom();
200
+ inputEl.focus();
201
+ }
202
+ }
203
+
204
+ function scrollToBottom() {
205
+ requestAnimationFrame(() => {
206
+ messagesEl.scrollTop = messagesEl.scrollHeight;
207
+ });
208
+ }
209
+
210
+ inputEl.focus();
211
+ </script>
212
+ </body>
213
+ </html>
core/api/static/style.css ADDED
@@ -0,0 +1,417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* ===== DESIGN TOKENS ===== */
2
+ :root {
3
+ --bg-primary: #0a0e1a;
4
+ --bg-secondary: #111827;
5
+ --bg-card: rgba(17, 24, 39, 0.7);
6
+ --bg-user-msg: #1d2951;
7
+ --bg-bot-msg: rgba(30, 41, 59, 0.6);
8
+ --bg-input: rgba(15, 23, 42, 0.8);
9
+ --border-color: rgba(99, 102, 241, 0.15);
10
+ --border-focus: rgba(99, 102, 241, 0.5);
11
+ --text-primary: #e2e8f0;
12
+ --text-secondary: #94a3b8;
13
+ --text-muted: #64748b;
14
+ --accent: #6366f1;
15
+ --accent-light: #818cf8;
16
+ --accent-glow: rgba(99, 102, 241, 0.25);
17
+ --red-accent: #dc2626;
18
+ --green-accent: #22c55e;
19
+ --radius: 12px;
20
+ --radius-lg: 16px;
21
+ --radius-pill: 24px;
22
+ --shadow-sm: 0 1px 3px rgba(0,0,0,0.3);
23
+ --shadow-md: 0 4px 20px rgba(0,0,0,0.4);
24
+ --shadow-glow: 0 0 30px var(--accent-glow);
25
+ --transition: 0.2s cubic-bezier(0.4, 0, 0.2, 1);
26
+ --font: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif;
27
+ }
28
+
29
+ /* ===== RESET & BASE ===== */
30
+ *, *::before, *::after {
31
+ margin: 0; padding: 0; box-sizing: border-box;
32
+ }
33
+
34
+ html, body {
35
+ height: 100%;
36
+ font-family: var(--font);
37
+ background: var(--bg-primary);
38
+ color: var(--text-primary);
39
+ overflow: hidden;
40
+ -webkit-font-smoothing: antialiased;
41
+ }
42
+
43
+ /* ===== ANIMATED BACKGROUND ===== */
44
+ body::before {
45
+ content: '';
46
+ position: fixed;
47
+ inset: 0;
48
+ background:
49
+ radial-gradient(ellipse 600px 400px at 20% 20%, rgba(99, 102, 241, 0.08), transparent),
50
+ radial-gradient(ellipse 500px 500px at 80% 80%, rgba(139, 92, 246, 0.06), transparent),
51
+ radial-gradient(ellipse 400px 300px at 50% 50%, rgba(59, 130, 246, 0.04), transparent);
52
+ pointer-events: none;
53
+ z-index: 0;
54
+ animation: bgPulse 8s ease-in-out infinite alternate;
55
+ }
56
+
57
+ @keyframes bgPulse {
58
+ 0% { opacity: 0.7; }
59
+ 100% { opacity: 1; }
60
+ }
61
+
62
+ /* ===== APP LAYOUT ===== */
63
+ #app {
64
+ display: flex;
65
+ flex-direction: column;
66
+ height: 100vh;
67
+ max-width: 860px;
68
+ margin: 0 auto;
69
+ position: relative;
70
+ z-index: 1;
71
+ }
72
+
73
+ /* ===== HEADER ===== */
74
+ header {
75
+ display: flex;
76
+ align-items: center;
77
+ gap: 14px;
78
+ padding: 16px 24px;
79
+ backdrop-filter: blur(20px);
80
+ background: rgba(10, 14, 26, 0.85);
81
+ border-bottom: 1px solid var(--border-color);
82
+ flex-shrink: 0;
83
+ }
84
+
85
+ .logo {
86
+ width: 42px; height: 42px;
87
+ border-radius: var(--radius);
88
+ background: linear-gradient(135deg, var(--accent), #8b5cf6);
89
+ display: flex; align-items: center; justify-content: center;
90
+ font-weight: 700; font-size: 18px; color: #fff;
91
+ box-shadow: var(--shadow-glow);
92
+ flex-shrink: 0;
93
+ }
94
+
95
+ .header-text h1 {
96
+ font-size: 17px;
97
+ font-weight: 600;
98
+ letter-spacing: -0.3px;
99
+ }
100
+
101
+ .header-text p {
102
+ font-size: 12px;
103
+ color: var(--text-muted);
104
+ margin-top: 1px;
105
+ }
106
+
107
+ .status-dot {
108
+ width: 8px; height: 8px;
109
+ border-radius: 50%;
110
+ background: var(--green-accent);
111
+ box-shadow: 0 0 8px rgba(34, 197, 94, 0.5);
112
+ margin-left: auto;
113
+ flex-shrink: 0;
114
+ animation: pulse 2s ease-in-out infinite;
115
+ }
116
+
117
+ @keyframes pulse {
118
+ 0%, 100% { opacity: 1; transform: scale(1); }
119
+ 50% { opacity: 0.6; transform: scale(0.85); }
120
+ }
121
+
122
+ /* ===== CHAT MESSAGES ===== */
123
+ #messages {
124
+ flex: 1;
125
+ overflow-y: auto;
126
+ padding: 20px 24px;
127
+ display: flex;
128
+ flex-direction: column;
129
+ gap: 6px;
130
+ scroll-behavior: smooth;
131
+ }
132
+
133
+ #messages::-webkit-scrollbar { width: 5px; }
134
+ #messages::-webkit-scrollbar-track { background: transparent; }
135
+ #messages::-webkit-scrollbar-thumb {
136
+ background: rgba(99, 102, 241, 0.2);
137
+ border-radius: 10px;
138
+ }
139
+
140
+ .msg {
141
+ max-width: 85%;
142
+ padding: 12px 16px;
143
+ border-radius: var(--radius-lg);
144
+ line-height: 1.65;
145
+ font-size: 14.5px;
146
+ animation: msgIn 0.3s ease-out both;
147
+ word-wrap: break-word;
148
+ }
149
+
150
+ @keyframes msgIn {
151
+ from { opacity: 0; transform: translateY(10px); }
152
+ to { opacity: 1; transform: translateY(0); }
153
+ }
154
+
155
+ .msg.user {
156
+ align-self: flex-end;
157
+ background: var(--bg-user-msg);
158
+ border: 1px solid rgba(99, 102, 241, 0.2);
159
+ border-bottom-right-radius: 4px;
160
+ color: #c7d2fe;
161
+ }
162
+
163
+ .msg.bot {
164
+ align-self: flex-start;
165
+ background: var(--bg-bot-msg);
166
+ backdrop-filter: blur(10px);
167
+ border: 1px solid var(--border-color);
168
+ border-bottom-left-radius: 4px;
169
+ }
170
+
171
+ .msg.bot .label {
172
+ font-size: 11px;
173
+ font-weight: 600;
174
+ color: var(--accent-light);
175
+ margin-bottom: 6px;
176
+ letter-spacing: 0.3px;
177
+ text-transform: uppercase;
178
+ }
179
+
180
+ /* Markdown rendering inside bot messages */
181
+ .msg.bot h1, .msg.bot h2, .msg.bot h3 {
182
+ margin: 14px 0 6px;
183
+ font-weight: 600;
184
+ color: #e2e8f0;
185
+ }
186
+ .msg.bot h1 { font-size: 16px; }
187
+ .msg.bot h2 { font-size: 15px; }
188
+ .msg.bot h3 { font-size: 14px; }
189
+
190
+ .msg.bot p { margin: 4px 0; }
191
+
192
+ .msg.bot ul, .msg.bot ol {
193
+ margin: 6px 0 6px 20px;
194
+ }
195
+
196
+ .msg.bot li {
197
+ margin-bottom: 3px;
198
+ }
199
+
200
+ .msg.bot strong {
201
+ color: #c7d2fe;
202
+ font-weight: 600;
203
+ }
204
+
205
+ .msg.bot code {
206
+ background: rgba(99, 102, 241, 0.15);
207
+ padding: 2px 6px;
208
+ border-radius: 4px;
209
+ font-size: 13px;
210
+ font-family: 'SF Mono', 'Fira Code', monospace;
211
+ }
212
+
213
+ .msg.bot pre {
214
+ background: rgba(0, 0, 0, 0.3);
215
+ padding: 12px;
216
+ border-radius: 8px;
217
+ overflow-x: auto;
218
+ margin: 8px 0;
219
+ }
220
+
221
+ .msg.bot pre code {
222
+ background: none;
223
+ padding: 0;
224
+ }
225
+
226
+ .msg.bot table {
227
+ width: 100%;
228
+ border-collapse: collapse;
229
+ margin: 8px 0;
230
+ font-size: 13px;
231
+ }
232
+
233
+ .msg.bot th, .msg.bot td {
234
+ padding: 6px 10px;
235
+ border: 1px solid rgba(99, 102, 241, 0.15);
236
+ text-align: left;
237
+ }
238
+
239
+ .msg.bot th {
240
+ background: rgba(99, 102, 241, 0.1);
241
+ font-weight: 600;
242
+ color: #c7d2fe;
243
+ }
244
+
245
+ /* Typing indicator */
246
+ .typing-indicator {
247
+ display: flex;
248
+ gap: 5px;
249
+ padding: 4px 0;
250
+ }
251
+
252
+ .typing-indicator span {
253
+ width: 7px; height: 7px;
254
+ border-radius: 50%;
255
+ background: var(--accent-light);
256
+ opacity: 0.4;
257
+ animation: blink 1.4s ease-in-out infinite;
258
+ }
259
+
260
+ .typing-indicator span:nth-child(2) { animation-delay: 0.2s; }
261
+ .typing-indicator span:nth-child(3) { animation-delay: 0.4s; }
262
+
263
+ @keyframes blink {
264
+ 0%, 100% { opacity: 0.3; transform: scale(0.85); }
265
+ 50% { opacity: 1; transform: scale(1); }
266
+ }
267
+
268
+ /* ===== WELCOME SCREEN ===== */
269
+ #welcome {
270
+ flex: 1;
271
+ display: flex;
272
+ flex-direction: column;
273
+ align-items: center;
274
+ justify-content: center;
275
+ padding: 40px 24px;
276
+ text-align: center;
277
+ }
278
+
279
+ #welcome .icon {
280
+ width: 72px; height: 72px;
281
+ border-radius: 20px;
282
+ background: linear-gradient(135deg, var(--accent), #8b5cf6);
283
+ display: flex; align-items: center; justify-content: center;
284
+ font-size: 32px;
285
+ box-shadow: var(--shadow-glow);
286
+ margin-bottom: 20px;
287
+ }
288
+
289
+ #welcome h2 {
290
+ font-size: 22px;
291
+ font-weight: 600;
292
+ margin-bottom: 8px;
293
+ letter-spacing: -0.3px;
294
+ }
295
+
296
+ #welcome p {
297
+ font-size: 14px;
298
+ color: var(--text-secondary);
299
+ max-width: 380px;
300
+ line-height: 1.6;
301
+ }
302
+
303
+ .suggestions {
304
+ display: flex;
305
+ flex-wrap: wrap;
306
+ gap: 8px;
307
+ margin-top: 28px;
308
+ justify-content: center;
309
+ max-width: 520px;
310
+ }
311
+
312
+ .suggestions button {
313
+ background: var(--bg-bot-msg);
314
+ backdrop-filter: blur(10px);
315
+ border: 1px solid var(--border-color);
316
+ color: var(--text-secondary);
317
+ font-family: var(--font);
318
+ font-size: 13px;
319
+ padding: 9px 16px;
320
+ border-radius: var(--radius-pill);
321
+ cursor: pointer;
322
+ transition: var(--transition);
323
+ white-space: nowrap;
324
+ }
325
+
326
+ .suggestions button:hover {
327
+ border-color: var(--accent);
328
+ color: var(--accent-light);
329
+ background: rgba(99, 102, 241, 0.08);
330
+ transform: translateY(-1px);
331
+ }
332
+
333
+ /* ===== INPUT AREA ===== */
334
+ #input-area {
335
+ padding: 16px 24px 20px;
336
+ backdrop-filter: blur(20px);
337
+ background: rgba(10, 14, 26, 0.85);
338
+ border-top: 1px solid var(--border-color);
339
+ flex-shrink: 0;
340
+ }
341
+
342
+ .input-row {
343
+ display: flex;
344
+ align-items: flex-end;
345
+ gap: 10px;
346
+ background: var(--bg-input);
347
+ border: 1px solid var(--border-color);
348
+ border-radius: var(--radius-lg);
349
+ padding: 6px 6px 6px 16px;
350
+ transition: var(--transition);
351
+ }
352
+
353
+ .input-row:focus-within {
354
+ border-color: var(--border-focus);
355
+ box-shadow: var(--shadow-glow);
356
+ }
357
+
358
+ #input {
359
+ flex: 1;
360
+ background: transparent;
361
+ border: none;
362
+ outline: none;
363
+ color: var(--text-primary);
364
+ font-family: var(--font);
365
+ font-size: 14.5px;
366
+ resize: none;
367
+ max-height: 120px;
368
+ line-height: 1.5;
369
+ padding: 8px 0;
370
+ }
371
+
372
+ #input::placeholder {
373
+ color: var(--text-muted);
374
+ }
375
+
376
+ #send-btn {
377
+ width: 40px; height: 40px;
378
+ border-radius: var(--radius);
379
+ border: none;
380
+ background: linear-gradient(135deg, var(--accent), #8b5cf6);
381
+ color: #fff;
382
+ cursor: pointer;
383
+ display: flex; align-items: center; justify-content: center;
384
+ flex-shrink: 0;
385
+ transition: var(--transition);
386
+ }
387
+
388
+ #send-btn:hover:not(:disabled) {
389
+ transform: scale(1.05);
390
+ box-shadow: var(--shadow-glow);
391
+ }
392
+
393
+ #send-btn:disabled {
394
+ opacity: 0.35;
395
+ cursor: default;
396
+ }
397
+
398
+ #send-btn svg {
399
+ width: 18px; height: 18px;
400
+ }
401
+
402
+ .hint {
403
+ font-size: 11px;
404
+ text-align: center;
405
+ color: var(--text-muted);
406
+ margin-top: 8px;
407
+ }
408
+
409
+ /* ===== RESPONSIVE ===== */
410
+ @media (max-width: 640px) {
411
+ header { padding: 12px 16px; }
412
+ #messages { padding: 14px 16px; }
413
+ #input-area { padding: 12px 16px 16px; }
414
+ .msg { max-width: 92%; font-size: 14px; }
415
+ #welcome h2 { font-size: 19px; }
416
+ .suggestions { flex-direction: column; align-items: center; }
417
+ }
core/gradio/user_gradio.py CHANGED
@@ -17,8 +17,8 @@ if str(REPO_ROOT) not in sys.path:
17
 
18
  @dataclass
19
  class GradioConfig:
20
- server_host: str = "127.0.0.1"
21
- server_port: int = 7860
22
 
23
 
24
  def _load_env() -> None:
@@ -219,5 +219,6 @@ if __name__ == "__main__":
219
  print(f"{'='*60}\n")
220
  demo.launch(
221
  server_name=GRADIO_CFG.server_host,
222
- server_port=GRADIO_CFG.server_port
 
223
  )
 
17
 
18
  @dataclass
19
  class GradioConfig:
20
+ server_host: str = os.getenv("GRADIO_HOST", "127.0.0.1")
21
+ server_port: int = int(os.getenv("GRADIO_PORT", "7860"))
22
 
23
 
24
  def _load_env() -> None:
 
219
  print(f"{'='*60}\n")
220
  demo.launch(
221
  server_name=GRADIO_CFG.server_host,
222
+ server_port=GRADIO_CFG.server_port,
223
+ share=True,
224
  )
core/preprocessing/docling_processor.py CHANGED
@@ -107,6 +107,11 @@ class DoclingProcessor:
107
  return None
108
 
109
  def parse_directory(self, source_dir: str) -> dict:
 
 
 
 
 
110
  self.logger.info(f"Found {len(pdf_files)} PDF files in {source_dir}")
111
 
112
  results = {"total": len(pdf_files), "parsed": 0, "skipped": 0, "errors": 0}
 
107
  return None
108
 
109
  def parse_directory(self, source_dir: str) -> dict:
110
+ source_path = Path(source_dir)
111
+ if not source_path.exists():
112
+ raise FileNotFoundError(f"Source directory not found: {source_dir}")
113
+
114
+ pdf_files = sorted(source_path.rglob("*.pdf"))
115
  self.logger.info(f"Found {len(pdf_files)} PDF files in {source_dir}")
116
 
117
  results = {"total": len(pdf_files), "parsed": 0, "skipped": 0, "errors": 0}
core/rag/embedding_model.py CHANGED
@@ -14,8 +14,8 @@ logger = logging.getLogger(__name__)
14
  @dataclass
15
  class EmbeddingConfig:
16
  api_base_url: str = "https://api.siliconflow.com/v1"
17
- model: str = "Qwen/Qwen3-Embedding-4B"
18
- dimension: int = 2048
19
  batch_size: int = 16
20
 
21
 
 
14
  @dataclass
15
  class EmbeddingConfig:
16
  api_base_url: str = "https://api.siliconflow.com/v1"
17
+ model: str = "Qwen/Qwen3-Embedding-8B"
18
+ dimension: int = 4096
19
  batch_size: int = 16
20
 
21
 
docker-compose.yml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ backend:
3
+ build: .
4
+ ports:
5
+ - "8000:8000"
6
+ volumes:
7
+ - ./data:/app/data # Mount data (ChromaDB + markdown files)
8
+ - ./.env:/app/.env # Mount API keys
9
+ environment:
10
+ - API_HOST=0.0.0.0
11
+ - API_PORT=8000
12
+ - FRONTEND_API_KEY=${FRONTEND_API_KEY} # API key để xác thực request
13
+ - RATE_LIMIT_MAX=30 # Max requests/phút/IP
14
+ restart: unless-stopped
evaluation/eval_utils.py CHANGED
@@ -48,12 +48,12 @@ def init_rag() -> tuple[RAGGenerator, QwenEmbeddings, OpenAI]:
48
  retriever = Retriever(vector_db=db)
49
  rag = RAGGenerator(retriever=retriever)
50
 
51
- # Initialize LLM client
52
- api_key = os.getenv("SILICONFLOW_API_KEY", "").strip()
53
  if not api_key:
54
- raise ValueError("Missing SILICONFLOW_API_KEY")
55
 
56
- llm_client = OpenAI(api_key=api_key, base_url="https://api.siliconflow.com/v1", timeout=60.0)
57
  return rag, embeddings, llm_client
58
 
59
 
@@ -61,7 +61,7 @@ def generate_answers(
61
  rag: RAGGenerator,
62
  questions: list,
63
  llm_client: OpenAI,
64
- llm_model: str = "nex-agi/DeepSeek-V3.1-Nex-N1",
65
  retrieval_mode: str = "hybrid_rerank",
66
  max_workers: int = 8,
67
  ) -> tuple[list, list]:
 
48
  retriever = Retriever(vector_db=db)
49
  rag = RAGGenerator(retriever=retriever)
50
 
51
+ # Initialize LLM client (same as production: Groq API)
52
+ api_key = os.getenv("GROQ_API_KEY", "").strip()
53
  if not api_key:
54
+ raise ValueError("Missing GROQ_API_KEY")
55
 
56
+ llm_client = OpenAI(api_key=api_key, base_url="https://api.groq.com/openai/v1", timeout=60.0)
57
  return rag, embeddings, llm_client
58
 
59
 
 
61
  rag: RAGGenerator,
62
  questions: list,
63
  llm_client: OpenAI,
64
+ llm_model: str = "qwen/qwen3-32b",
65
  retrieval_mode: str = "hybrid_rerank",
66
  max_workers: int = 8,
67
  ) -> tuple[list, list]:
evaluation/ragas_eval.py CHANGED
@@ -24,7 +24,7 @@ from evaluation.eval_utils import load_csv_data, init_rag, generate_answers
24
  # Configuration
25
  CSV_PATH = "data/data.csv"
26
  OUTPUT_DIR = "evaluation/results"
27
- LLM_MODEL = os.getenv("EVAL_LLM_MODEL", "nex-agi/DeepSeek-V3.1-Nex-N1")
28
  API_BASE = "https://api.siliconflow.com/v1"
29
 
30
 
 
24
  # Configuration
25
  CSV_PATH = "data/data.csv"
26
  OUTPUT_DIR = "evaluation/results"
27
+ LLM_MODEL = os.getenv("EVAL_LLM_MODEL", "qwen/qwen3-32b")
28
  API_BASE = "https://api.siliconflow.com/v1"
29
 
30
 
requirements.txt CHANGED
@@ -19,6 +19,8 @@ numpy==2.2.6
19
 
20
  # UI
21
  gradio==6.2.0
 
 
22
 
23
  # Evaluation
24
  ragas==0.4.2
 
19
 
20
  # UI
21
  gradio==6.2.0
22
+ fastapi==0.115.12
23
+ uvicorn==0.34.2
24
 
25
  # Evaluation
26
  ragas==0.4.2
setup.bat CHANGED
@@ -31,5 +31,5 @@ if not exist ".env" (
31
 
32
  echo.
33
  echo Setup complete!
34
- echo Run: venv\Scripts\activate ^& python scripts\run_app.py
35
  pause
 
31
 
32
  echo.
33
  echo Setup complete!
34
+ echo Run: venv\Scripts\activate ^& python core/api/server.py
35
  pause
setup.sh CHANGED
@@ -35,4 +35,4 @@ fi
35
 
36
  echo ""
37
  echo "Setup complete!"
38
- echo "Run: source venv/bin/activate && python scripts/run_app.py"
 
35
 
36
  echo ""
37
  echo "Setup complete!"
38
+ echo "Run: source venv/bin/activate && python core/api/server.py"