wessamelden commited on
Commit
4c09ca7
ยท
1 Parent(s): f2c87c6

add API key protection

Browse files
.env.example CHANGED
@@ -1,7 +1,9 @@
1
- # โ”€โ”€ Groq LLM โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
2
- # ุงุญุตู„ ุนู„ู‰ ู…ูุชุงุญ ู…ุฌุงู†ูŠ ู…ู†: https://console.groq.com/keys
 
 
 
3
  GROQ_API_KEY=gsk_your_groq_api_key_here
4
- # ุงู„ู†ู…ุงุฐุฌ ุงู„ู…ุชุงุญุฉ: llama-3.3-70b-versatile, llama-3.1-8b-instant, mixtral-8x7b-32768
5
  GROQ_MODEL=llama-3.3-70b-versatile
6
 
7
  # โ”€โ”€ ุงู„ุงุณุชุฑุฌุงุน โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
@@ -11,6 +13,12 @@ CHROMA_COLLECTION=rag_docs
11
  TOP_K=8
12
  MAX_CONTEXT_CHARS=10000
13
 
 
 
 
 
 
 
14
  # โ”€โ”€ ุฐุงูƒุฑุฉ ุงู„ู…ุญุงุฏุซุฉ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
15
  MAX_TURNS=6
16
  MAX_SESSIONS=200
@@ -22,4 +30,6 @@ CHUNK_SIZE=1600
22
  CHUNK_OVERLAP=200
23
 
24
  # โ”€โ”€ ุงู„ุดุจูƒุฉ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
25
- ALLOWED_ORIGINS=*
 
 
 
1
+ # โ”€โ”€ ู†ู…ูˆุฐุฌ ุงู„ู„ุบุฉ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
2
+ OLLAMA_MODEL=gemma3
3
+ OLLAMA_URL=http://127.0.0.1:11434/api/chat
4
+
5
+ # โ”€โ”€ Groq LLM (Reranker & Chat) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
6
  GROQ_API_KEY=gsk_your_groq_api_key_here
 
7
  GROQ_MODEL=llama-3.3-70b-versatile
8
 
9
  # โ”€โ”€ ุงู„ุงุณุชุฑุฌุงุน โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
 
13
  TOP_K=8
14
  MAX_CONTEXT_CHARS=10000
15
 
16
+ # โ”€โ”€ Reranker (HuggingFace) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
17
+ # ุงุญุตู„ ุนู„ู‰ token ู…ุฌุงู†ูŠ ู…ู†: https://huggingface.co/settings/tokens
18
+ # HF_API_TOKEN=hf_your_token_here
19
+ # RERANKER_MODEL=Qwen/Qwen3-Reranker-0.6B
20
+ # RERANKER_CONCURRENCY=4
21
+
22
  # โ”€โ”€ ุฐุงูƒุฑุฉ ุงู„ู…ุญุงุฏุซุฉ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
23
  MAX_TURNS=6
24
  MAX_SESSIONS=200
 
30
  CHUNK_OVERLAP=200
31
 
32
  # โ”€โ”€ ุงู„ุดุจูƒุฉ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
33
+ # ููŠ ุงู„ุชุทูˆูŠุฑ: *
34
+ # ููŠ ุงู„ุฅู†ุชุงุฌ: ุถุน ุนู†ูˆุงู† ุชุทุจูŠู‚ Flutter ุฃูˆ ุฑุงุจุท ุงู„ุฎุงุฏู…
35
+ ALLOWED_ORIGINS=*
README.md DELETED
@@ -1,31 +0,0 @@
1
- ---
2
- title: ASU RAG Chatbot
3
- emoji: ๐ŸŽ“
4
- colorFrom: blue
5
- colorTo: green
6
- sdk: docker
7
- app_port: 7860
8
- pinned: false
9
- ---
10
-
11
- # ASU RAG Chatbot
12
-
13
- ู…ุณุงุนุฏ ุฃูƒุงุฏูŠู…ูŠ ุฐูƒูŠ ู„ุทู„ุงุจ ูƒู„ูŠุฉ ุงู„ุนู„ูˆู… - ุฌุงู…ุนุฉ ุนูŠู† ุดู…ุณ
14
-
15
- ## Setup
16
-
17
- Set the following secrets in your HuggingFace Space settings:
18
-
19
- | Secret | Description |
20
- |--------|-------------|
21
- | `GROQ_API_KEY` | Get free at [console.groq.com](https://console.groq.com/keys) |
22
- | `HF_API_TOKEN` | Get free at [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) |
23
-
24
- ## API
25
-
26
- | Endpoint | Description |
27
- |----------|-------------|
28
- | `POST /chat` | Streaming chat (SSE) |
29
- | `GET /health` | Health check |
30
- | `POST /retrieve` | Debug: raw retrieval results |
31
- | `DELETE /session/{id}` | Clear conversation history |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/__init__.py DELETED
File without changes
app/api/auth.py DELETED
@@ -1,16 +0,0 @@
1
- from fastapi import Header, HTTPException, status
2
- from app.core.config import settings
3
-
4
-
5
- async def require_api_key(x_api_key: str = Header(default="")):
6
- """
7
- ุชุญู‚ู‚ ู…ู† ู…ูุชุงุญ ุงู„ู€ API ููŠ header ูƒู„ ุทู„ุจ.
8
- ุฅุฐุง ูƒุงู† API_SECRET_KEY ูุงุฑุบุงู‹ ููŠ ุงู„ู€ env โ†’ ุงู„ุญู…ุงูŠุฉ ู…ุนุทู‘ู„ุฉ (ู„ู„ุชุทูˆูŠุฑ).
9
- """
10
- if not settings.api_secret_key:
11
- return # no key configured โ†’ open (local dev only)
12
- if x_api_key != settings.api_secret_key:
13
- raise HTTPException(
14
- status_code=status.HTTP_401_UNAUTHORIZED,
15
- detail="Invalid or missing API key",
16
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/api/routes_chat.py CHANGED
@@ -1,12 +1,13 @@
1
  import uuid
2
  import json
3
  import time
4
- from fastapi import APIRouter, HTTPException
5
  from fastapi.responses import StreamingResponse
6
  from pydantic import BaseModel, Field
7
 
8
  from app.memory import memory
9
  from app.pipeline import chat_pipeline
 
10
  from app.llm.groq_client import stream_response
11
  from app.core.logging_setup import get_logger
12
 
 
1
  import uuid
2
  import json
3
  import time
4
+ from fastapi import APIRouter
5
  from fastapi.responses import StreamingResponse
6
  from pydantic import BaseModel, Field
7
 
8
  from app.memory import memory
9
  from app.pipeline import chat_pipeline
10
+ # from app.llm.ollama_client import stream_response
11
  from app.llm.groq_client import stream_response
12
  from app.core.logging_setup import get_logger
13
 
app/api/routes_health.py CHANGED
@@ -1,13 +1,6 @@
1
- """
2
- app/api/routes_health.py
3
- =========================
4
- Endpoints ู„ู„ู…ุฑุงู‚ุจุฉ ูˆุงู„ุชุดุฎูŠุต.
5
-
6
- /health โ†’ ู‡ู„ ุงู„ู€ server ูŠุนู…ู„ุŸ ู‡ู„ Groq ู…ุชุตู„ุŸ
7
- /retrieve โ†’ ุฃุฏุงุฉ debug ู„ู„ุชุญู‚ู‚ ู…ู† ุฌูˆุฏุฉ ุงู„ุงุณุชุฑุฌุงุน
8
- """
9
 
10
  from groq import AsyncGroq
 
11
  from fastapi import APIRouter
12
  from pydantic import BaseModel
13
 
@@ -18,12 +11,30 @@ from app.memory import memory
18
 
19
  router = APIRouter()
20
 
 
21
  _groq_client = AsyncGroq(api_key=settings.groq_api_key)
22
 
23
 
24
  @router.get("/health")
25
  async def health():
 
 
 
 
 
 
 
 
 
26
  retriever = get_retriever()
 
 
 
 
 
 
 
 
27
  groq_ok = False
28
  try:
29
  await _groq_client.chat.completions.create(
@@ -36,6 +47,9 @@ async def health():
36
  pass
37
 
38
  return {
 
 
 
39
  "status": "ok" if groq_ok else "error",
40
  "groq_connected": groq_ok,
41
  "model": settings.groq_model,
@@ -56,4 +70,4 @@ async def retrieve(req: RetrieveRequest):
56
  retriever = get_retriever()
57
  k = req.top_k or settings.top_k
58
  chunks = retriever.search(req.question, top_k=k)
59
- return {"chunks": chunks}
 
 
 
 
 
 
 
 
 
1
 
2
  from groq import AsyncGroq
3
+ # import httpx
4
  from fastapi import APIRouter
5
  from pydantic import BaseModel
6
 
 
11
 
12
  router = APIRouter()
13
 
14
+ # _OLLAMA_BASE_URL = settings.ollama_url.split("/api/")[0]
15
  _groq_client = AsyncGroq(api_key=settings.groq_api_key)
16
 
17
 
18
  @router.get("/health")
19
  async def health():
20
+ """
21
+ ุชุญู‚ู‚ ู…ู† ุญุงู„ุฉ ุงู„ู†ุธุงู….
22
+
23
+ ูŠูุนูŠุฏ:
24
+ status: "ok" ุฃูˆ "error"
25
+ ollama_connected: ู‡ู„ Ollama ูŠุณุชุฌูŠุจุŸ
26
+ chunks_indexed: ุนุฏุฏ chunks ููŠ ู‚ุงุนุฏุฉ ุงู„ุจูŠุงู†ุงุช
27
+ sessions_active: ุนุฏุฏ ุงู„ุฌู„ุณุงุช ุงู„ู†ุดุทุฉ ููŠ ุงู„ุฐุงูƒุฑุฉ
28
+ """
29
  retriever = get_retriever()
30
+ # ollama_ok = False
31
+ # try:
32
+ # async with httpx.AsyncClient(timeout=httpx.Timeout(5.0)) as client:
33
+ # r = await client.get(f"{_OLLAMA_BASE_URL}/api/tags")
34
+ # ollama_ok = r.status_code == 200
35
+ # except (httpx.HTTPError, OSError):
36
+ # pass
37
+
38
  groq_ok = False
39
  try:
40
  await _groq_client.chat.completions.create(
 
47
  pass
48
 
49
  return {
50
+ # "status": "ok" if ollama_ok else "error",
51
+ # "ollama_connected": ollama_ok,
52
+ # "model": settings.ollama_model,
53
  "status": "ok" if groq_ok else "error",
54
  "groq_connected": groq_ok,
55
  "model": settings.groq_model,
 
70
  retriever = get_retriever()
71
  k = req.top_k or settings.top_k
72
  chunks = retriever.search(req.question, top_k=k)
73
+ return {"chunks": chunks}
app/core/config.py CHANGED
@@ -5,18 +5,26 @@ load_dotenv()
5
 
6
 
7
  class Settings:
8
- # โ”€โ”€ ู†ู…ูˆุฐุฌ ุงู„ู„ุบุฉ (Groq) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
 
 
 
 
9
  groq_api_key: str = os.getenv("GROQ_API_KEY", "")
10
  groq_model: str = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
11
 
12
  # โ”€โ”€ ุงู„ุงุณุชุฑุฌุงุน ูˆุงู„ุชุถู…ูŠู† โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
13
- # ู†ู…ูˆุฐุฌ ุงู„ุชุถู…ูŠู† โ€” ูŠุฌุจ ุฃู† ูŠูƒูˆู† ู†ูุณู‡ ููŠ ุงู„ุงุณุชูŠุนุงุจ ูˆุงู„ุงุณุชุฑุฌุงุน ุฏุงุฆู…ุงู‹
14
  embed_model: str = os.getenv("EMBED_MODEL", "paraphrase-multilingual-mpnet-base-v2")
15
  chroma_path: str = os.getenv("CHROMA_PATH", "vectorstore")
16
  chroma_collection: str = os.getenv("CHROMA_COLLECTION", "rag_docs")
17
  top_k: int = min(int(os.getenv("TOP_K", "8")), 8)
18
  max_context_chars: int = int(os.getenv("MAX_CONTEXT_CHARS", "10000"))
19
 
 
 
 
 
 
20
  # โ”€โ”€ ุฐุงูƒุฑุฉ ุงู„ู…ุญุงุฏุซุฉ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
21
  max_turns: int = int(os.getenv("MAX_TURNS", "6"))
22
  max_sessions: int = int(os.getenv("MAX_SESSIONS", "200"))
@@ -36,6 +44,8 @@ class Settings:
36
  ]
37
 
38
  # โ”€โ”€ Timeouts โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
 
 
39
  keepalive_interval: int = 20 # ุซุงู†ูŠุฉ โ€” heartbeat ู„ู„ู€ SSE
40
 
41
 
 
5
 
6
 
7
  class Settings:
8
+ # โ”€โ”€ ู†ู…ูˆุฐุฌ ุงู„ู„ุบุฉ (Ollama) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
9
+ ollama_model: str = os.getenv("OLLAMA_MODEL", "gemma3")
10
+ ollama_url: str = os.getenv("OLLAMA_URL", "http://127.0.0.1:11434/api/chat")
11
+
12
+ # โ”€โ”€ ู†ู…ูˆุฐุฌ ุงู„ู„ุบุฉ (Groq) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
13
  groq_api_key: str = os.getenv("GROQ_API_KEY", "")
14
  groq_model: str = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
15
 
16
  # โ”€โ”€ ุงู„ุงุณุชุฑุฌุงุน ูˆุงู„ุชุถู…ูŠู† โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
 
17
  embed_model: str = os.getenv("EMBED_MODEL", "paraphrase-multilingual-mpnet-base-v2")
18
  chroma_path: str = os.getenv("CHROMA_PATH", "vectorstore")
19
  chroma_collection: str = os.getenv("CHROMA_COLLECTION", "rag_docs")
20
  top_k: int = min(int(os.getenv("TOP_K", "8")), 8)
21
  max_context_chars: int = int(os.getenv("MAX_CONTEXT_CHARS", "10000"))
22
 
23
+ # โ”€โ”€ Reranker (HuggingFace) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
24
+ #hf_api_token: str = os.getenv("HF_API_TOKEN", "")
25
+ #reranker_model: str = os.getenv("RERANKER_MODEL", "Qwen/Qwen3-Reranker-0.6B")
26
+ #reranker_concurrency: int = int(os.getenv("RERANKER_CONCURRENCY", "4"))
27
+
28
  # โ”€โ”€ ุฐุงูƒุฑุฉ ุงู„ู…ุญุงุฏุซุฉ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
29
  max_turns: int = int(os.getenv("MAX_TURNS", "6"))
30
  max_sessions: int = int(os.getenv("MAX_SESSIONS", "200"))
 
44
  ]
45
 
46
  # โ”€โ”€ Timeouts โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
47
+ # None = ู„ุง timeout (ุงู„ุงุณุชู†ุชุงุฌ ุนู„ู‰ CPU ู‚ุฏ ูŠุฃุฎุฐ 200+ ุซุงู†ูŠุฉ)
48
+ ollama_timeout = None
49
  keepalive_interval: int = 20 # ุซุงู†ูŠุฉ โ€” heartbeat ู„ู„ู€ SSE
50
 
51
 
app/llm/ollama_client.py CHANGED
@@ -16,25 +16,7 @@ async def stream_response(
16
  session_id: str,
17
  original_question: str,
18
  ):
19
- """
20
- ุจุซ ุงู„ุฑุฏ ู…ู† Ollama ู…ุน ุญูุธ ุงู„ู…ุญุงุฏุซุฉ ููŠ ุงู„ุฐุงูƒุฑุฉ.
21
-
22
- ูƒูŠู ูŠุนู…ู„ ุงู„ู€ StreamingุŸ
23
- โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
24
- ุจุฏู„ุงู‹ ู…ู† ุงู„ุงู†ุชุธุงุฑ ุญุชู‰ ุชูƒุชู…ู„ ุงู„ุฅุฌุงุจุฉ ูƒุงู…ู„ุงู‹ (ู‚ุฏ ูŠุณุชุบุฑู‚ 200 ุซุงู†ูŠุฉ)ุŒ
25
- ู†ูุชุญ ุงุชุตุงู„ุงู‹ ู…ุณุชู…ุฑุงู‹ ูˆู†ูุฑุณู„ ูƒู„ ูƒู„ู…ุฉ ููˆุฑ ุฅู†ุชุงุฌู‡ุง.
26
- ุงู„ู…ุณุชุฎุฏู… ูŠุฑู‰ ุงู„ุฑุฏ ูŠุธู‡ุฑ ุชุฏุฑูŠุฌูŠุงู‹ ูƒุฃู† ุดุฎุตุงู‹ ูŠูƒุชุจ.
27
-
28
- Heartbeat (ู†ุจุถุฉ ุงู„ู‚ู„ุจ):
29
- โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
30
- ุฅุฐุง ู„ู… ูŠุฃุชู ุชูˆูƒู† ู„ู€ 20 ุซุงู†ูŠุฉุŒ ู†ูุฑุณู„ ู…ุณุงูุฉ ุตุบูŠุฑุฉ (zero-width space).
31
- ู‡ุฐุง ูŠู…ู†ุน ุงู„ู…ุชุตูุญ ุฃูˆ ุงู„ุดุจูƒุฉ ู…ู† ุงุนุชุจุงุฑ ุงู„ุงุชุตุงู„ "ู…ุงุช" ูˆู‚ุทุนู‡.
32
-
33
- ุงู„ุญูุธ ููŠ ุงู„ุฐุงูƒุฑุฉ:
34
- โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
35
- ุนู†ุฏ ุงูƒุชู…ุงู„ ุงู„ุฅุฌุงุจุฉุŒ ู†ุญูุธ ุงู„ุณุคุงู„ ูˆุงู„ุฅุฌุงุจุฉ ููŠ ConversationMemory
36
- ุญุชู‰ ูŠุนู…ู„ ุงู„ู€ follow-up ููŠ ุงู„ุฑุณุงุฆู„ ุงู„ุชุงู„ูŠุฉ.
37
- """
38
  full_answer = ""
39
  stream_completed = False
40
  logger.info("ุจุฏุก ุชูˆู„ูŠุฏ ุงู„ุฅุฌุงุจุฉ | session=%s", session_id)
@@ -120,12 +102,7 @@ async def stream_response(
120
 
121
 
122
  async def warmup_model() -> bool:
123
- """
124
- ุญู…ู‘ู„ ุงู„ู†ู…ูˆุฐุฌ ููŠ ุฐุงูƒุฑุฉ Ollama ุนู†ุฏ ุจุฏุก ุงู„ุชุทุจูŠู‚.
125
- ูŠู…ู†ุน ุงู„ุชุฃุฎูŠุฑ ุงู„ูƒุจูŠุฑ ููŠ ุฃูˆู„ ุทู„ุจ.
126
 
127
- ูŠูุนุงุฏ True ุฅุฐุง ู†ุฌุญุŒ False ุฅุฐุง ูุดู„.
128
- """
129
  try:
130
  async with httpx.AsyncClient(timeout=httpx.Timeout(300.0)) as client:
131
  await client.post(
 
16
  session_id: str,
17
  original_question: str,
18
  ):
19
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  full_answer = ""
21
  stream_completed = False
22
  logger.info("ุจุฏุก ุชูˆู„ูŠุฏ ุงู„ุฅุฌุงุจุฉ | session=%s", session_id)
 
102
 
103
 
104
  async def warmup_model() -> bool:
 
 
 
105
 
 
 
106
  try:
107
  async with httpx.AsyncClient(timeout=httpx.Timeout(300.0)) as client:
108
  await client.post(
app/memory/conversation_memory.py CHANGED
@@ -1,20 +1,14 @@
1
- """
2
- memory.py โ€” Production conversation store with TTL eviction.
3
- Keeps the last N turns per session so "why?" follow-ups work correctly.
4
- Evicts sessions older than TTL and caps total sessions to prevent OOM.
5
- """
6
-
7
  import time
8
  import threading
9
  from collections import OrderedDict, deque
10
  from dataclasses import dataclass
11
  from typing import Literal
12
- import os
13
 
14
  from app.core.config import settings
15
  from app.core.logging_setup import get_logger
16
 
17
  logger = get_logger(__name__)
 
18
  @dataclass
19
  class Turn:
20
  role: Literal["user", "assistant"]
@@ -108,13 +102,8 @@ class ConversationMemory:
108
  def session_count(self) -> int:
109
  return len(self._sessions)
110
 
111
-
112
- # Global singleton shared across all requests
113
- MAX_TURNS = int(os.getenv("MAX_TURNS", "6"))
114
- MAX_SESSIONS = int(os.getenv("MAX_SESSIONS", "200"))
115
- SESSION_TTL = int(os.getenv("SESSION_TTL", "3600"))
116
  memory = ConversationMemory(
117
- max_turns=MAX_TURNS,
118
- max_sessions=MAX_SESSIONS,
119
- ttl_seconds=SESSION_TTL,
120
  )
 
 
 
 
 
 
 
1
  import time
2
  import threading
3
  from collections import OrderedDict, deque
4
  from dataclasses import dataclass
5
  from typing import Literal
 
6
 
7
  from app.core.config import settings
8
  from app.core.logging_setup import get_logger
9
 
10
  logger = get_logger(__name__)
11
+
12
  @dataclass
13
  class Turn:
14
  role: Literal["user", "assistant"]
 
102
  def session_count(self) -> int:
103
  return len(self._sessions)
104
 
 
 
 
 
 
105
  memory = ConversationMemory(
106
+ max_turns=settings.max_turns,
107
+ max_sessions=settings.max_sessions,
108
+ ttl_seconds=settings.session_ttl,
109
  )
app/pipeline/chat_pipeline.py CHANGED
@@ -5,7 +5,9 @@ from langdetect import detect as detect_lang, LangDetectException
5
 
6
  from app.core.config import settings
7
  from app.core.logging_setup import get_logger
8
- from app.retrieval import get_retriever, rerank_chunks
 
 
9
  from app.pipeline.query_handler import is_followup_question, rewrite_query
10
  from app.pipeline.context_builder import build_context, extract_sources
11
  from app.pipeline.prompt_builder import build_system_prompt
@@ -37,17 +39,6 @@ async def run(
37
  session_id: str,
38
  history: list[dict],
39
  ) -> PipelineResult:
40
- """
41
- ู†ูู‘ุฐ pipeline ูƒุงู…ู„ ู„ุณุคุงู„ ูˆุงุญุฏ.
42
-
43
- ุงู„ู…ุฏุฎู„ุงุช:
44
- question: ู†ุต ุงู„ุณุคุงู„ (ุจุนุฏ trim)
45
- session_id: ู…ุนุฑู‘ู ุงู„ุฌู„ุณุฉ
46
- history: ุชุงุฑูŠุฎ ุงู„ู…ุญุงุฏุซุฉ ู…ู† ConversationMemory
47
-
48
- ุงู„ู…ุฎุฑุฌ:
49
- PipelineResult ุฌุงู‡ุฒ ู„ู„ุฅุฑุณุงู„ ู„ู„ู€ LLM
50
- """
51
  t_start = time.time()
52
  lang = _detect_language(question)
53
 
@@ -65,9 +56,10 @@ async def run(
65
  )
66
 
67
  # โ”€โ”€ ุงู„ุฎุทูˆุฉ 4: ุฅุนุงุฏุฉ ุงู„ุชุฑุชูŠุจ ุจุงู„ู€ Reranker โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
68
- # ุฅุฐุง ู„ู… ูŠูˆุฌุฏ HF_API_TOKEN โ†’ ูŠูุนุงุฏ ุงู„ุชุฑุชูŠุจ ุงู„ุฃุตู„ูŠ ุจุฏูˆู† ุชุบูŠูŠุฑ (graceful degradation)
69
  chunks = await rerank_chunks(search_query, chunks, top_k=5, lang=lang)
70
-
 
71
  # โ”€โ”€ ุงู„ุฎุทูˆุฉ 5: ุจู†ุงุก ุงู„ุณูŠุงู‚ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
72
  context = build_context(chunks)
73
  sources = extract_sources(chunks)
 
5
 
6
  from app.core.config import settings
7
  from app.core.logging_setup import get_logger
8
+ from app.retrieval import get_retriever
9
+ # from app.retrieval import get_retriever # rerank_chunks DISABLED: using Groq API
10
+ from app.retrieval.reranker import rerank_chunks
11
  from app.pipeline.query_handler import is_followup_question, rewrite_query
12
  from app.pipeline.context_builder import build_context, extract_sources
13
  from app.pipeline.prompt_builder import build_system_prompt
 
39
  session_id: str,
40
  history: list[dict],
41
  ) -> PipelineResult:
 
 
 
 
 
 
 
 
 
 
 
42
  t_start = time.time()
43
  lang = _detect_language(question)
44
 
 
56
  )
57
 
58
  # โ”€โ”€ ุงู„ุฎุทูˆุฉ 4: ุฅุนุงุฏุฉ ุงู„ุชุฑุชูŠุจ ุจุงู„ู€ Reranker โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
59
+ # DISABLED: local HuggingFace reranker replaced by Groq API before discussion day.
60
  chunks = await rerank_chunks(search_query, chunks, top_k=5, lang=lang)
61
+ # chunks = chunks[:5] # fallback: take top 5 from RRF order
62
+
63
  # โ”€โ”€ ุงู„ุฎุทูˆุฉ 5: ุจู†ุงุก ุงู„ุณูŠุงู‚ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
64
  context = build_context(chunks)
65
  sources = extract_sources(chunks)
app/pipeline/context_builder.py CHANGED
@@ -2,19 +2,7 @@ from app.core.config import settings
2
 
3
 
4
  def format_chunk(index: int, chunk: dict) -> str:
5
- """
6
- ู†ุณู‘ู‚ chunk ูˆุงุญุฏ ู…ุน ุชุฑูˆูŠุณุฉ ุชุญุชูˆูŠ ุนู„ู‰ ู…ุนู„ูˆู…ุงุช ุงู„ุณูŠุงู‚.
7
-
8
- ุงู„ู…ุฏุฎู„:
9
- index: ุฑู‚ู… ุงู„ู€ chunk (0-based)
10
- chunk: dict ูŠุญุชูˆูŠ ุนู„ู‰ "text" ูˆ "metadata"
11
-
12
- ุงู„ู…ุฎุฑุฌ:
13
- ู†ุต ู…ู†ุณู‘ู‚ ู…ุน ุชุฑูˆูŠุณุฉ ุจูŠู† ุฃู‚ูˆุงุณ ู…ุฑุจุนุฉ
14
- ู…ุซุงู„:
15
- [ู…ู‚ุชุทู 1 โ€” ุงู„ุณูŠุงู‚: ุจุฑู†ุงู…ุฌ ุงู„ุฑูŠุงุถูŠุงุช โ€” ุงู„ู…ุณุชูˆู‰ 3 โ€” ุงู„ูุตู„: ุงู„ุฃูˆู„]
16
- ... ู†ุต ุงู„ู€ chunk ...
17
- """
18
  meta = chunk.get("metadata", {})
19
  article = meta.get("article_number", "")
20
  breadcrumb = (
 
2
 
3
 
4
  def format_chunk(index: int, chunk: dict) -> str:
5
+
 
 
 
 
 
 
 
 
 
 
 
 
6
  meta = chunk.get("metadata", {})
7
  article = meta.get("article_number", "")
8
  breadcrumb = (
app/pipeline/prompt_builder.py CHANGED
@@ -7,15 +7,7 @@ _PROMPTS_DIR = Path(__file__).resolve().parent.parent.parent / "prompts"
7
 
8
 
9
  def build_system_prompt(language: str) -> str:
10
- """
11
- ุงุฑุฌุน ุงู„ู€ system prompt ุงู„ู…ู†ุงุณุจ ุญุณุจ ุงู„ู„ุบุฉ.
12
 
13
- ุงู„ู…ุนุงู…ู„ุงุช:
14
- language: "ar" ู„ู„ุนุฑุจูŠุฉุŒ "en" ู„ู„ุฅู†ุฌู„ูŠุฒูŠุฉ
15
-
16
- ุงู„ุฅุฑุฌุงุน:
17
- ู†ุต ุงู„ู€ prompt ุงู„ูƒุงู…ู„
18
- """
19
  filename = "system_ar.txt" if language == "ar" else "system_en.txt"
20
  prompt_path = _PROMPTS_DIR / filename
21
 
 
7
 
8
 
9
  def build_system_prompt(language: str) -> str:
 
 
10
 
 
 
 
 
 
 
11
  filename = "system_ar.txt" if language == "ar" else "system_en.txt"
12
  prompt_path = _PROMPTS_DIR / filename
13
 
app/pipeline/query_handler.py CHANGED
@@ -1,25 +1,14 @@
1
  import json
2
- from groq import AsyncGroq
3
  from app.core.config import settings
4
  from app.core.logging_setup import get_logger
5
 
6
  logger = get_logger(__name__)
7
 
8
- _client = AsyncGroq(api_key=settings.groq_api_key)
9
 
10
 
11
  def is_followup_question(question: str) -> bool:
12
- """
13
- ุงูƒุดู ู…ุง ุฅุฐุง ูƒุงู† ุงู„ุณุคุงู„ ูŠุนุชู…ุฏ ุนู„ู‰ context ุณุงุจู‚.
14
-
15
- ูŠุนุชู…ุฏ ุนู„ู‰ 4 ุฅุดุงุฑุงุช:
16
- - ู‚ุตูŠุฑ (โ‰ค8 ูƒู„ู…ุงุช)
17
- - ูŠุญุชูˆูŠ ุนู„ู‰ ูƒู„ู…ุฉ ุงุณุชูู‡ุงู… ุบูŠุฑ ู…ุญุฏุฏุฉ
18
- - ูŠุญุชูˆูŠ ุนู„ู‰ ุถู…ูŠุฑ ุฅุดุงุฑูŠ (ุฏู‡/ู‡ุฐุง)
19
- - ูŠุจุฏุฃ ุจุญุฑู ุนุทู (ูˆ/ู/ู„ูƒู†)
20
-
21
- ุฅุฐุง ุชูˆูู‘ุฑุช ุฅุดุงุฑุชุงู† ุฃูˆ ุฃูƒุซุฑ โ†’ ุณุคุงู„ ู…ุชุงุจูุน.
22
- """
23
  followup_keywords = [
24
  "ู„ู…ุงุฐุง", "ูƒูŠู", "ู…ุงุฐุง", "ูˆุถุญ", "ุงุดุฑุญ", "ูŠุนู†ูŠ", "ุทูŠุจ", "ูˆุฅูŠู‡",
25
  "why", "how", "what do you mean", "explain", "elaborate",
@@ -37,11 +26,6 @@ def is_followup_question(question: str) -> bool:
37
 
38
 
39
  async def rewrite_query(question: str, history: list[dict]) -> str:
40
- """
41
- ุฃุนุฏ ุตูŠุงุบุฉ ุงู„ุณุคุงู„ ู„ูŠูƒูˆู† ู…ุณุชู‚ู„ุงู‹ ุจุงุณุชุฎุฏุงู… Groq.
42
-
43
- ุฅุฐุง ูุดู„ ุงู„ู€ LLM ู„ุฃูŠ ุณุจุจ โ†’ ูŠูุนุงุฏ ุงู„ุณุคุงู„ ุงู„ุฃุตู„ูŠ ุจุฏูˆู† ุชุบูŠูŠุฑ.
44
- """
45
  if not history:
46
  return question
47
 
@@ -55,16 +39,20 @@ async def rewrite_query(question: str, history: list[dict]) -> str:
55
  )
56
 
57
  try:
58
- response = await _client.chat.completions.create(
59
- model=settings.groq_model,
60
- messages=[{"role": "user", "content": rewrite_prompt}],
61
- max_tokens=100,
62
- temperature=0.1,
63
- )
64
- rewritten = response.choices[0].message.content.strip()
65
- logger.info("ุชู…ุช ุฅุนุงุฏุฉ ุตูŠุงุบุฉ ุงู„ุณุคุงู„: %s", rewritten)
66
- return rewritten
 
 
 
 
67
  except Exception as exc:
68
  logger.warning("ูุดู„ ุฅุนุงุฏุฉ ุงู„ุตูŠุงุบุฉ: %s", exc)
69
 
70
- return question # fallback: ุงู„ุณุคุงู„ ุงู„ุฃุตู„ูŠ
 
1
  import json
2
+ import httpx
3
  from app.core.config import settings
4
  from app.core.logging_setup import get_logger
5
 
6
  logger = get_logger(__name__)
7
 
8
+ _OLLAMA_TIMEOUT = httpx.Timeout(settings.ollama_timeout)
9
 
10
 
11
  def is_followup_question(question: str) -> bool:
 
 
 
 
 
 
 
 
 
 
 
12
  followup_keywords = [
13
  "ู„ู…ุงุฐุง", "ูƒูŠู", "ู…ุงุฐุง", "ูˆุถุญ", "ุงุดุฑุญ", "ูŠุนู†ูŠ", "ุทูŠุจ", "ูˆุฅูŠู‡",
14
  "why", "how", "what do you mean", "explain", "elaborate",
 
26
 
27
 
28
  async def rewrite_query(question: str, history: list[dict]) -> str:
 
 
 
 
 
29
  if not history:
30
  return question
31
 
 
39
  )
40
 
41
  try:
42
+ async with httpx.AsyncClient(timeout=_OLLAMA_TIMEOUT) as client:
43
+ response = await client.post(
44
+ settings.ollama_url,
45
+ json={
46
+ "model": settings.ollama_model,
47
+ "messages": [{"role": "user", "content": rewrite_prompt}],
48
+ "stream": False,
49
+ },
50
+ )
51
+ if response.status_code == 200:
52
+ rewritten = response.json()["message"]["content"].strip()
53
+ logger.info("ุชู…ุช ุฅุนุงุฏุฉ ุตูŠุงุบุฉ ุงู„ุณุคุงู„: %s", rewritten)
54
+ return rewritten
55
  except Exception as exc:
56
  logger.warning("ูุดู„ ุฅุนุงุฏุฉ ุงู„ุตูŠุงุบุฉ: %s", exc)
57
 
58
+ return question # fallback: ุงู„ุณุคุงู„ ุงู„ุฃุตู„ูŠ
app/retrieval/__init__.py CHANGED
@@ -1,2 +1,2 @@
1
  from app.retrieval.retriever import get_retriever, reset_retriever
2
- from app.retrieval.reranker import rerank_chunks, warmup_reranker
 
1
  from app.retrieval.retriever import get_retriever, reset_retriever
2
+ # from app.retrieval.reranker import rerank_chunks # DISABLED: using Groq API reranker
app/retrieval/retriever.py CHANGED
@@ -1,20 +1,3 @@
1
- """
2
- retriever.py โ€” Hybrid retriever with weighted RRF fusion (CPU-only production)
3
- ==============================================================================
4
- - CPU-only embedding (GPU reserved for Ollama LLM)
5
- - Arabic-aware BM25 tokenizer (diacritics, prefix stripping, alef normalization)
6
- - BM25 index persisted via joblib โ€” skips rebuild if collection unchanged
7
- - top_k hard-capped at 8 (raised from 5) to give LLM enough rows to
8
- reconstruct multi-row academic tables without OOM risk on CPU
9
- - fetch_k = top_k ร— 4 โ€” wide candidate pool for fragmented tables
10
- - Weighted RRF: structural queries (level/dept/course) get 2ร— vector weight
11
- and 0.5ร— BM25 weight to suppress noise from ubiquitous terms like "ุณุงุนุฉ"
12
- - BM25 score threshold: skip BM25 results when max raw score < 0.1
13
- (query has no meaningful keyword match โ€” prevents random noise from
14
- contaminating the fusion ranking)
15
- - reset_retriever() holds _init_lock to prevent concurrent partial-reset reads
16
- """
17
-
18
  import re
19
  import time
20
  import joblib
@@ -193,16 +176,6 @@ def _is_structural_query(query: str) -> bool:
193
 
194
 
195
  def _build_metadata_filter(query: str) -> dict | None:
196
- """Build a ChromaDB `where` filter from the query's level/semester mentions.
197
-
198
- When a user asks about "ุงู„ู…ุณุชูˆู‰ ุงู„ุฃูˆู„ ุงู„ูุตู„ ุงู„ุซุงู†ูŠ", this returns a filter
199
- that restricts vector search to chunks whose `level_number` = "1" AND
200
- `semester` = "ุงู„ุซุงู†ูŠ". This prevents Level-2/3/4 chunks (which may be
201
- semantically closer due to course-code overlap) from outranking the
202
- actually-requested Level-1 chunks.
203
-
204
- Returns None if no level/semester can be extracted (general query).
205
- """
206
  level = _extract_level_number(query)
207
  semester = _extract_semester(query)
208
 
@@ -221,7 +194,11 @@ def _build_metadata_filter(query: str) -> dict | None:
221
 
222
 
223
  def _select_device() -> str:
224
- return "cpu" # embeddings run on CPU; LLM is cloud-based (Groq)
 
 
 
 
225
 
226
 
227
  # โ”€โ”€ Retriever โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
@@ -229,8 +206,8 @@ def _select_device() -> str:
229
  class Retriever:
230
  def __init__(self):
231
  device = _select_device()
232
- logger.info("[INIT] Embedding device: {device}")
233
- self.embed_model = SentenceTransformer(settings.embed_model, ...)
234
  self.client = chromadb.PersistentClient(path=settings.chroma_path)
235
  self.collection = self.client.get_or_create_collection(name=settings.chroma_collection)
236
 
@@ -255,7 +232,7 @@ class Retriever:
255
  print(f"[CACHE] BM25 loaded ({len(self.documents)} docs)")
256
  return
257
  except Exception as e:
258
- logger.warning("[WARN] BM25 cache invalid: {e}")
259
 
260
  print("[BUILD] Building BM25 index...")
261
  all_docs = self.collection.get()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import re
2
  import time
3
  import joblib
 
176
 
177
 
178
  def _build_metadata_filter(query: str) -> dict | None:
 
 
 
 
 
 
 
 
 
 
179
  level = _extract_level_number(query)
180
  semester = _extract_semester(query)
181
 
 
194
 
195
 
196
  def _select_device() -> str:
197
+ try:
198
+ import torch
199
+ return "cuda" if torch.cuda.is_available() else "cpu"
200
+ except ImportError:
201
+ return "cpu"
202
 
203
 
204
  # โ”€โ”€ Retriever โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
 
206
  class Retriever:
207
  def __init__(self):
208
  device = _select_device()
209
+ logger.info("[INIT] Embedding device: %s", device)
210
+ self.embed_model = SentenceTransformer(settings.embed_model, device=device)
211
  self.client = chromadb.PersistentClient(path=settings.chroma_path)
212
  self.collection = self.client.get_or_create_collection(name=settings.chroma_collection)
213
 
 
232
  print(f"[CACHE] BM25 loaded ({len(self.documents)} docs)")
233
  return
234
  except Exception as e:
235
+ logger.warning("[WARN] BM25 cache invalid: %s", e)
236
 
237
  print("[BUILD] Building BM25 index...")
238
  all_docs = self.collection.get()
main.py CHANGED
@@ -1,17 +1,6 @@
1
- """
2
- main.py โ€” ู†ู‚ุทุฉ ุงู„ุฏุฎูˆู„ ุงู„ูˆุญูŠุฏุฉ ู„ู„ุชุทุจูŠู‚.
3
- =========================================
4
- ู‡ุฐุง ุงู„ู…ู„ู ู…ุณุคูˆู„ ุนู† ุดูŠุก ูˆุงุญุฏ ูู‚ุท:
5
- ุชุฌู…ูŠุน ูƒู„ ุฃุฌุฒุงุก ุงู„ุชุทุจูŠู‚ ูˆุชุดุบูŠู„ู‡.
6
-
7
- ู„ุง ูŠุญุชูˆูŠ ุนู„ู‰ ุฃูŠ ู…ู†ุทู‚.
8
- ุฃูŠ ู…ู†ุทู‚ ูŠุฌุจ ุฃู† ูŠูƒูˆู† ููŠ app/
9
- """
10
-
11
  import os
12
  import sys
13
 
14
- # ุฅุฌุจุงุฑ UTF-8 ุนู„ู‰ Windows
15
  if sys.stdout.encoding != "utf-8":
16
  sys.stdout.reconfigure(encoding="utf-8", errors="replace")
17
  if sys.stderr.encoding != "utf-8":
@@ -29,26 +18,21 @@ from app.core.logging_setup import setup_logging, get_logger
29
  from app.api.routes_chat import router as chat_router
30
  from app.api.routes_health import router as health_router
31
 
32
- # ุฅุนุฏุงุฏ ุงู„ู€ logging ููˆุฑุงู‹
33
  setup_logging()
34
  logger = get_logger("startup")
35
 
36
 
37
  @asynccontextmanager
38
  async def lifespan(app: FastAPI):
39
- """
40
- Startup/Shutdown hooks.
41
- ูŠูู†ููŽู‘ุฐ ุนู†ุฏ ุจุฏุก ุงู„ุชุดุบูŠู„ุŒ ูˆ yield ูŠุนู†ูŠ "ุงู„ุชุทุจูŠู‚ ูŠุนู…ู„ ุงู„ุขู†".
42
- """
43
  from pathlib import Path
44
- from app.retrieval import get_retriever, reset_retriever, warmup_reranker
 
45
  from app.llm.groq_client import warmup_model
 
46
 
47
- # ุฃู†ุดุฆ ุงู„ู…ุฌู„ุฏุงุช ุงู„ู„ุงุฒู…ุฉ
48
  Path(settings.data_dir).mkdir(parents=True, exist_ok=True)
49
  Path("data/pdfs").mkdir(parents=True, exist_ok=True)
50
 
51
- # ุชุญู‚ู‚ ู…ู† ุงู„ู€ vectorstore โ€” ุฃุนุฏ ุงู„ุงุณุชูŠุนุงุจ ุชู„ู‚ุงุฆูŠุงู‹ ุฅุฐุง ูƒุงู† ูุงุฑุบุงู‹
52
  guide_path = Path(settings.data_dir) / "guide.md"
53
  if guide_path.exists():
54
  retriever = get_retriever()
@@ -56,30 +40,20 @@ async def lifespan(app: FastAPI):
56
  logger.info("ู‚ุงุนุฏุฉ ุงู„ุจูŠุงู†ุงุช ุงู„ู…ุชุฌู‡ูŠุฉ ูุงุฑุบุฉ โ€” ุจุฏุก ุงู„ุงุณุชูŠุนุงุจ ุงู„ุชู„ู‚ุงุฆูŠ...")
57
  from app.ingestion import ingest_all_markdown
58
  ingest_all_markdown(settings.data_dir)
59
- # โ”€โ”€ ุฅุนุงุฏุฉ ุจู†ุงุก ุงู„ู€ Retriever ุจุนุฏ ุงู„ุงุณุชูŠุนุงุจ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
60
- # ุงู„ู€ singleton ุงู„ู‚ุฏูŠู… ุฃูู†ุดุฆ ูˆุงู„ู…ุฌู…ูˆุนุฉ ูุงุฑุบุฉุŒ ู„ุฐุง documents=[]
61
- # ูˆ bm25=None. ูŠุฌุจ ุฅุนุงุฏุฉ ุฅู†ุดุงุฆู‡ ู„ูŠู‚ุฑุฃ ุงู„ุจูŠุงู†ุงุช ุงู„ุฌุฏูŠุฏุฉ.
62
- reset_retriever()
63
- logger.info("ุชู… ุฅุนุงุฏุฉ ุชู‡ูŠุฆุฉ ุงู„ู€ Retriever ุจุนุฏ ุงู„ุงุณุชูŠุนุงุจ")
64
 
65
- # ุชุณุฎูŠู† ู†ู…ูˆุฐุฌ ุงู„ุชุถู…ูŠู†
66
  retriever = get_retriever()
67
- logger.info("ุนุฏุฏ ุงู„ู€ chunks ููŠ ุงู„ู…ุฌู…ูˆุนุฉ: %d", retriever.collection.count())
68
  retriever.embed_model.encode(["warm up"], normalize_embeddings=True)
69
  logger.info("ุชู… ุชุณุฎูŠู† ู†ู…ูˆุฐุฌ ุงู„ุชุถู…ูŠู†")
70
 
71
- # ุชุญู…ูŠู„ Groq ู…ุณุจู‚ุงู‹
72
  await warmup_model()
73
-
74
- # ุชุญู‚ู‚ ู…ู† ุงู„ู€ Reranker
75
  await warmup_reranker()
76
 
77
- logger.info("โœ… ุงู„ุชุทุจูŠู‚ ุฌุงู‡ุฒ โ€” %s", settings.groq_model)
 
78
  yield
79
  logger.info("ุงู„ุชุทุจูŠู‚ ูŠูุบู„ู‚...")
80
 
81
 
82
- # โ”€โ”€ ุฅู†ุดุงุก ุงู„ุชุทุจูŠู‚ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
83
  app = FastAPI(
84
  title="ASU RAG Chatbot",
85
  version="3.0.0",
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import sys
3
 
 
4
  if sys.stdout.encoding != "utf-8":
5
  sys.stdout.reconfigure(encoding="utf-8", errors="replace")
6
  if sys.stderr.encoding != "utf-8":
 
18
  from app.api.routes_chat import router as chat_router
19
  from app.api.routes_health import router as health_router
20
 
 
21
  setup_logging()
22
  logger = get_logger("startup")
23
 
24
 
25
  @asynccontextmanager
26
  async def lifespan(app: FastAPI):
 
 
 
 
27
  from pathlib import Path
28
+ from app.retrieval import get_retriever
29
+ # from app.llm.ollama_client import warmup_model
30
  from app.llm.groq_client import warmup_model
31
+ from app.retrieval.reranker import warmup_reranker
32
 
 
33
  Path(settings.data_dir).mkdir(parents=True, exist_ok=True)
34
  Path("data/pdfs").mkdir(parents=True, exist_ok=True)
35
 
 
36
  guide_path = Path(settings.data_dir) / "guide.md"
37
  if guide_path.exists():
38
  retriever = get_retriever()
 
40
  logger.info("ู‚ุงุนุฏุฉ ุงู„ุจูŠุงู†ุงุช ุงู„ู…ุชุฌู‡ูŠุฉ ูุงุฑุบุฉ โ€” ุจุฏุก ุงู„ุงุณุชูŠุนุงุจ ุงู„ุชู„ู‚ุงุฆูŠ...")
41
  from app.ingestion import ingest_all_markdown
42
  ingest_all_markdown(settings.data_dir)
 
 
 
 
 
43
 
 
44
  retriever = get_retriever()
 
45
  retriever.embed_model.encode(["warm up"], normalize_embeddings=True)
46
  logger.info("ุชู… ุชุณุฎูŠู† ู†ู…ูˆุฐุฌ ุงู„ุชุถู…ูŠู†")
47
 
 
48
  await warmup_model()
 
 
49
  await warmup_reranker()
50
 
51
+ # logger.info("ุงู„ุชุทุจูŠู‚ ุฌุงู‡ุฒ โ€” %s", settings.ollama_model)
52
+ logger.info("ุงู„ุชุทุจูŠู‚ ุฌุงู‡ุฒ โ€” %s", settings.groq_model)
53
  yield
54
  logger.info("ุงู„ุชุทุจูŠู‚ ูŠูุบู„ู‚...")
55
 
56
 
 
57
  app = FastAPI(
58
  title="ASU RAG Chatbot",
59
  version="3.0.0",
requirements.txt CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  fastapi==0.111.0
2
  uvicorn[standard]==0.29.0
3
  python-multipart==0.0.9
@@ -7,12 +10,12 @@ sentence-transformers==3.0.1
7
 
8
  # Vector DB (must match vectorstore format)
9
  chromadb==0.5.3
10
- posthog==3.0.2
11
 
12
- # Groq LLM client
13
  groq==0.9.0
14
-
15
- # Reranker HTTP client (HuggingFace Inference API)
 
16
  httpx==0.27.0
17
 
18
  # NLP utilities
@@ -23,7 +26,6 @@ rank-bm25==0.2.2
23
  # Data stack (CRITICAL pins)
24
  numpy==1.26.4
25
  scikit-learn==1.4.2
26
- pandas==2.2.2
27
  joblib>=1.3.0
28
 
29
  # Optional but stabilizes HF stack
@@ -35,6 +37,9 @@ python-dotenv==1.0.1
35
  pydantic==2.7.1
36
  pydantic-settings==2.2.1
37
 
38
- # Torch CPU build (HF Spaces compatible)
39
- --extra-index-url https://download.pytorch.org/whl/cpu
40
- torch==2.3.1+cpu
 
 
 
 
1
+ # PyTorch index โ€” CUDA 12.1 (GPU). For CPU: change cu121 to cpu
2
+ --extra-index-url https://download.pytorch.org/whl/cu121
3
+
4
  fastapi==0.111.0
5
  uvicorn[standard]==0.29.0
6
  python-multipart==0.0.9
 
10
 
11
  # Vector DB (must match vectorstore format)
12
  chromadb==0.5.3
 
13
 
14
+ # LLM + Reranker HTTP client
15
  groq==0.9.0
16
+ # httpx is used for:
17
+ # 1. Streaming Ollama responses
18
+ # 2. Reranker API calls (currently Groq โ€” HuggingFace disable for now)
19
  httpx==0.27.0
20
 
21
  # NLP utilities
 
26
  # Data stack (CRITICAL pins)
27
  numpy==1.26.4
28
  scikit-learn==1.4.2
 
29
  joblib>=1.3.0
30
 
31
  # Optional but stabilizes HF stack
 
37
  pydantic==2.7.1
38
  pydantic-settings==2.2.1
39
 
40
+ # Torch โ€” CUDA 12.1 build for RTX 3050 GPU acceleration (default)
41
+ # To switch to CPU instead:
42
+ # Step 1: Comment out the --extra-index-url line at the top of this file
43
+ # Step 2: Replace the line below with: torch==2.3.1+cpu
44
+ # Step 3: Run: pip install torch==2.3.1+cpu --extra-index-url https://download.pytorch.org/whl/cpu
45
+ torch==2.3.1+cu121
setup.sh CHANGED
@@ -6,34 +6,41 @@ set -e
6
 
7
  echo ""
8
  echo "โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•—"
9
- echo "โ•‘ Arabic RAG Chatbot โ€” Setup Script โ•‘"
10
  echo "โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"
11
  echo ""
12
 
13
  # โ”€โ”€ 1. Python virtual environment โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
14
  if [ ! -d ".venv" ]; then
15
- echo "๐Ÿ“ฆ Creating Python virtual environment..."
16
  python -m venv .venv
17
  fi
18
  source .venv/bin/activate
19
 
20
- echo "๐Ÿ“ฆ Installing Python dependencies..."
21
  pip install --upgrade pip -q
22
  pip install -r requirements.txt -q
23
- echo "โœ… Python dependencies installed"
 
 
 
 
 
 
 
24
 
25
  # โ”€โ”€ 2. Ollama check โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
26
  echo ""
27
- echo "๐Ÿ” Checking Ollama..."
28
  if ! command -v ollama &> /dev/null; then
29
- echo "โŒ Ollama not found. Install it from: https://ollama.com/download"
30
- echo " Then run: ollama pull gemma3"
31
  exit 1
32
  fi
33
 
34
  # Check if Ollama is running
35
  if ! curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
36
- echo "๐Ÿš€ Starting Ollama server in background..."
37
  ollama serve &
38
  sleep 3
39
  fi
@@ -42,38 +49,34 @@ echo "โœ… Ollama is running"
42
 
43
  # โ”€โ”€ 3. Pull LLM model if needed โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
44
  echo ""
45
- echo "๐Ÿ“ฅ Checking for gemma3 model..."
46
  if ! ollama list | grep -q "gemma3"; then
47
- echo "๐Ÿ“ฅ Pulling gemma3 (this downloads ~4 GB once)..."
48
  ollama pull gemma3
49
  else
50
- echo "โœ… gemma3 already available"
51
  fi
52
 
53
  # โ”€โ”€ 4. Prepare Markdown knowledge base โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
54
  echo ""
55
- echo "๐Ÿ“ Preparing data/markdown/ directory..."
56
  mkdir -p data/markdown
57
 
58
  MD_FILES=$(find data/markdown -name "*.md" 2>/dev/null | wc -l)
59
  if [ "$MD_FILES" -gt 0 ]; then
60
- echo "๐Ÿ“„ Found $MD_FILES Markdown file(s). Ingesting..."
61
- python ingest_markdown.py
62
- echo "โœ… Knowledge base ready"
63
  else
64
- echo "โš ๏ธ No Markdown files found in data/markdown/"
65
- echo " Place your .md files there and either:"
66
- echo " โ€ข Run: python ingest_markdown.py"
67
- echo " โ€ข Or upload via the web UI at http://localhost:8000"
68
  fi
69
 
70
  # โ”€โ”€ 5. Start FastAPI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
71
  echo ""
72
- echo "๐Ÿš€ Starting FastAPI server..."
73
- echo " UI: http://localhost:8000"
74
- echo " API docs: http://localhost:8000/docs"
75
- echo " Health: http://localhost:8000/health"
76
- echo " Press Ctrl+C to stop"
77
  echo ""
78
 
79
  uvicorn main:app --host 0.0.0.0 --port 8000 --reload
 
6
 
7
  echo ""
8
  echo "โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•—"
9
+ echo "โ•‘ ASU RAG Chatbot โ€” Setup Script โ•‘"
10
  echo "โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•"
11
  echo ""
12
 
13
  # โ”€โ”€ 1. Python virtual environment โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
14
  if [ ! -d ".venv" ]; then
15
+ echo "Creating Python virtual environment..."
16
  python -m venv .venv
17
  fi
18
  source .venv/bin/activate
19
 
20
+ echo "Installing Python dependencies..."
21
  pip install --upgrade pip -q
22
  pip install -r requirements.txt -q
23
+ echo "Python dependencies installed"
24
+
25
+ # โ”€โ”€ .env setup โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
26
+ if [ ! -f ".env" ]; then
27
+ echo "Creating .env from template..."
28
+ cp .env.example .env
29
+ echo ".env created โ€” edit it if needed"
30
+ fi
31
 
32
  # โ”€โ”€ 2. Ollama check โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
33
  echo ""
34
+ echo "Checking Ollama..."
35
  if ! command -v ollama &> /dev/null; then
36
+ echo "Ollama not found. Install it from: https://ollama.com/download"
37
+ echo "Then run: ollama pull gemma3"
38
  exit 1
39
  fi
40
 
41
  # Check if Ollama is running
42
  if ! curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
43
+ echo "Starting Ollama server in background..."
44
  ollama serve &
45
  sleep 3
46
  fi
 
49
 
50
  # โ”€โ”€ 3. Pull LLM model if needed โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
51
  echo ""
52
+ echo "Checking for gemma3 model..."
53
  if ! ollama list | grep -q "gemma3"; then
54
+ echo "Pulling gemma3 (this downloads ~4 GB once)..."
55
  ollama pull gemma3
56
  else
57
+ echo "gemma3 already available"
58
  fi
59
 
60
  # โ”€โ”€ 4. Prepare Markdown knowledge base โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
61
  echo ""
62
+ echo "Preparing data/markdown/ directory..."
63
  mkdir -p data/markdown
64
 
65
  MD_FILES=$(find data/markdown -name "*.md" 2>/dev/null | wc -l)
66
  if [ "$MD_FILES" -gt 0 ]; then
67
+ echo "Found $MD_FILES Markdown file(s). Ingestion will run automatically on server startup."
68
+
 
69
  else
70
+ echo "No Markdown files found in data/markdown/"
71
+ echo "โ€ข Place your .md files then restart the server โ€” ingestion runs automatically"
 
 
72
  fi
73
 
74
  # โ”€โ”€ 5. Start FastAPI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
75
  echo ""
76
+ echo "Starting FastAPI server..."
77
+ echo "API docs: http://localhost:8000/docs"
78
+ echo "Health: http://localhost:8000/health"
79
+ echo "Press Ctrl+C to stop"
 
80
  echo ""
81
 
82
  uvicorn main:app --host 0.0.0.0 --port 8000 --reload
test_reranker.py DELETED
@@ -1,28 +0,0 @@
1
- import asyncio
2
- import logging
3
- from app.retrieval.reranker import rerank_chunks, warmup_reranker
4
-
5
- logging.basicConfig(level=logging.INFO)
6
-
7
- async def test():
8
- print("Testing warmup...")
9
- ok = await warmup_reranker()
10
- print("Warmup OK:", ok)
11
-
12
- chunks = [
13
- {'text': 'ุชุชูƒูˆู† ูƒู„ูŠุฉ ุงู„ุนู„ูˆู… ู…ู† ุงู‚ุณุงู… ุงู„ุฑูŠุงุถูŠุงุช ูˆุงู„ููŠุฒูŠุงุก ูˆุงู„ูƒูŠู…ูŠุงุก', 'source': 'guide.md', 'rrf_score': 0.5, 'metadata': {}},
14
- {'text': 'ูŠุฌุจ ุนู„ู‰ ุงู„ุทุงู„ุจ ุงุฌุชูŠุงุฒ 140 ุณุงุนุฉ ู…ุนุชู…ุฏุฉ', 'source': 'guide.md', 'rrf_score': 0.4, 'metadata': {}},
15
- {'text': 'ุงู„ุทู‚ุณ ุฌู…ูŠู„ ุงู„ูŠูˆู…', 'source': 'x.md', 'rrf_score': 0.3, 'metadata': {}},
16
- {'text': 'ู‚ุณู… ุงู„ุฑูŠุงุถูŠุงุช ูŠุถู… ุชุฎุตุตุงุช ุนุฏูŠุฏุฉ', 'source': 'guide.md', 'rrf_score': 0.2, 'metadata': {}},
17
- {'text': 'ู…ูˆุงุนูŠุฏ ุงู„ุชุณุฌูŠู„ ููŠ ุงู„ูุตู„ ุงู„ุงูˆู„', 'source': 'guide.md', 'rrf_score': 0.1, 'metadata': {}},
18
- {'text': 'ูƒู„ูŠุฉ ุงู„ุนู„ูˆู… ุฌุงู…ุนุฉ ุนูŠู† ุดู…ุณ ุชุฃุณุณุช ุนุงู… 1950', 'source': 'guide.md', 'rrf_score': 0.05, 'metadata': {}}
19
- ]
20
-
21
- print("\nTesting reranking...")
22
- result = await rerank_chunks('ู…ุง ู‡ูŠ ุงู‚ุณุงู… ูƒู„ูŠุฉ ุงู„ุนู„ูˆู…ุŸ', chunks, top_k=3)
23
- print("\nResults:")
24
- for r in result:
25
- print(f"{r['rerank_score']:.4f} | {r['text'][:60]}")
26
-
27
- if __name__ == "__main__":
28
- asyncio.run(test())