Spaces:
Running
Running
Commit ยท
4c09ca7
1
Parent(s): f2c87c6
add API key protection
Browse files- .env.example +14 -4
- README.md +0 -31
- app/__init__.py +0 -0
- app/api/auth.py +0 -16
- app/api/routes_chat.py +2 -1
- app/api/routes_health.py +23 -9
- app/core/config.py +12 -2
- app/llm/ollama_client.py +1 -24
- app/memory/conversation_memory.py +4 -15
- app/pipeline/chat_pipeline.py +6 -14
- app/pipeline/context_builder.py +1 -13
- app/pipeline/prompt_builder.py +0 -8
- app/pipeline/query_handler.py +16 -28
- app/retrieval/__init__.py +1 -1
- app/retrieval/retriever.py +8 -31
- main.py +5 -31
- requirements.txt +13 -8
- setup.sh +27 -24
- test_reranker.py +0 -28
.env.example
CHANGED
|
@@ -1,7 +1,9 @@
|
|
| 1 |
-
# โโ
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
| 3 |
GROQ_API_KEY=gsk_your_groq_api_key_here
|
| 4 |
-
# ุงููู
ุงุฐุฌ ุงูู
ุชุงุญุฉ: llama-3.3-70b-versatile, llama-3.1-8b-instant, mixtral-8x7b-32768
|
| 5 |
GROQ_MODEL=llama-3.3-70b-versatile
|
| 6 |
|
| 7 |
# โโ ุงูุงุณุชุฑุฌุงุน โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
@@ -11,6 +13,12 @@ CHROMA_COLLECTION=rag_docs
|
|
| 11 |
TOP_K=8
|
| 12 |
MAX_CONTEXT_CHARS=10000
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
# โโ ุฐุงูุฑุฉ ุงูู
ุญุงุฏุซุฉ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 15 |
MAX_TURNS=6
|
| 16 |
MAX_SESSIONS=200
|
|
@@ -22,4 +30,6 @@ CHUNK_SIZE=1600
|
|
| 22 |
CHUNK_OVERLAP=200
|
| 23 |
|
| 24 |
# โโ ุงูุดุจูุฉ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 25 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
# โโ ูู
ูุฐุฌ ุงููุบุฉ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 2 |
+
OLLAMA_MODEL=gemma3
|
| 3 |
+
OLLAMA_URL=http://127.0.0.1:11434/api/chat
|
| 4 |
+
|
| 5 |
+
# โโ Groq LLM (Reranker & Chat) โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 6 |
GROQ_API_KEY=gsk_your_groq_api_key_here
|
|
|
|
| 7 |
GROQ_MODEL=llama-3.3-70b-versatile
|
| 8 |
|
| 9 |
# โโ ุงูุงุณุชุฑุฌุงุน โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
|
|
| 13 |
TOP_K=8
|
| 14 |
MAX_CONTEXT_CHARS=10000
|
| 15 |
|
| 16 |
+
# โโ Reranker (HuggingFace) โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 17 |
+
# ุงุญุตู ุนูู token ู
ุฌุงูู ู
ู: https://huggingface.co/settings/tokens
|
| 18 |
+
# HF_API_TOKEN=hf_your_token_here
|
| 19 |
+
# RERANKER_MODEL=Qwen/Qwen3-Reranker-0.6B
|
| 20 |
+
# RERANKER_CONCURRENCY=4
|
| 21 |
+
|
| 22 |
# โโ ุฐุงูุฑุฉ ุงูู
ุญุงุฏุซุฉ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 23 |
MAX_TURNS=6
|
| 24 |
MAX_SESSIONS=200
|
|
|
|
| 30 |
CHUNK_OVERLAP=200
|
| 31 |
|
| 32 |
# โโ ุงูุดุจูุฉ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 33 |
+
# ูู ุงูุชุทููุฑ: *
|
| 34 |
+
# ูู ุงูุฅูุชุงุฌ: ุถุน ุนููุงู ุชุทุจูู Flutter ุฃู ุฑุงุจุท ุงูุฎุงุฏู
|
| 35 |
+
ALLOWED_ORIGINS=*
|
README.md
DELETED
|
@@ -1,31 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: ASU RAG Chatbot
|
| 3 |
-
emoji: ๐
|
| 4 |
-
colorFrom: blue
|
| 5 |
-
colorTo: green
|
| 6 |
-
sdk: docker
|
| 7 |
-
app_port: 7860
|
| 8 |
-
pinned: false
|
| 9 |
-
---
|
| 10 |
-
|
| 11 |
-
# ASU RAG Chatbot
|
| 12 |
-
|
| 13 |
-
ู
ุณุงุนุฏ ุฃูุงุฏูู
ู ุฐูู ูุทูุงุจ ูููุฉ ุงูุนููู
- ุฌุงู
ุนุฉ ุนูู ุดู
ุณ
|
| 14 |
-
|
| 15 |
-
## Setup
|
| 16 |
-
|
| 17 |
-
Set the following secrets in your HuggingFace Space settings:
|
| 18 |
-
|
| 19 |
-
| Secret | Description |
|
| 20 |
-
|--------|-------------|
|
| 21 |
-
| `GROQ_API_KEY` | Get free at [console.groq.com](https://console.groq.com/keys) |
|
| 22 |
-
| `HF_API_TOKEN` | Get free at [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) |
|
| 23 |
-
|
| 24 |
-
## API
|
| 25 |
-
|
| 26 |
-
| Endpoint | Description |
|
| 27 |
-
|----------|-------------|
|
| 28 |
-
| `POST /chat` | Streaming chat (SSE) |
|
| 29 |
-
| `GET /health` | Health check |
|
| 30 |
-
| `POST /retrieve` | Debug: raw retrieval results |
|
| 31 |
-
| `DELETE /session/{id}` | Clear conversation history |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/__init__.py
DELETED
|
File without changes
|
app/api/auth.py
DELETED
|
@@ -1,16 +0,0 @@
|
|
| 1 |
-
from fastapi import Header, HTTPException, status
|
| 2 |
-
from app.core.config import settings
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
async def require_api_key(x_api_key: str = Header(default="")):
|
| 6 |
-
"""
|
| 7 |
-
ุชุญูู ู
ู ู
ูุชุงุญ ุงูู API ูู header ูู ุทูุจ.
|
| 8 |
-
ุฅุฐุง ูุงู API_SECRET_KEY ูุงุฑุบุงู ูู ุงูู env โ ุงูุญู
ุงูุฉ ู
ุนุทููุฉ (ููุชุทููุฑ).
|
| 9 |
-
"""
|
| 10 |
-
if not settings.api_secret_key:
|
| 11 |
-
return # no key configured โ open (local dev only)
|
| 12 |
-
if x_api_key != settings.api_secret_key:
|
| 13 |
-
raise HTTPException(
|
| 14 |
-
status_code=status.HTTP_401_UNAUTHORIZED,
|
| 15 |
-
detail="Invalid or missing API key",
|
| 16 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/api/routes_chat.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
| 1 |
import uuid
|
| 2 |
import json
|
| 3 |
import time
|
| 4 |
-
from fastapi import APIRouter
|
| 5 |
from fastapi.responses import StreamingResponse
|
| 6 |
from pydantic import BaseModel, Field
|
| 7 |
|
| 8 |
from app.memory import memory
|
| 9 |
from app.pipeline import chat_pipeline
|
|
|
|
| 10 |
from app.llm.groq_client import stream_response
|
| 11 |
from app.core.logging_setup import get_logger
|
| 12 |
|
|
|
|
| 1 |
import uuid
|
| 2 |
import json
|
| 3 |
import time
|
| 4 |
+
from fastapi import APIRouter
|
| 5 |
from fastapi.responses import StreamingResponse
|
| 6 |
from pydantic import BaseModel, Field
|
| 7 |
|
| 8 |
from app.memory import memory
|
| 9 |
from app.pipeline import chat_pipeline
|
| 10 |
+
# from app.llm.ollama_client import stream_response
|
| 11 |
from app.llm.groq_client import stream_response
|
| 12 |
from app.core.logging_setup import get_logger
|
| 13 |
|
app/api/routes_health.py
CHANGED
|
@@ -1,13 +1,6 @@
|
|
| 1 |
-
"""
|
| 2 |
-
app/api/routes_health.py
|
| 3 |
-
=========================
|
| 4 |
-
Endpoints ููู
ุฑุงูุจุฉ ูุงูุชุดุฎูุต.
|
| 5 |
-
|
| 6 |
-
/health โ ูู ุงูู server ูุนู
ูุ ูู Groq ู
ุชุตูุ
|
| 7 |
-
/retrieve โ ุฃุฏุงุฉ debug ููุชุญูู ู
ู ุฌูุฏุฉ ุงูุงุณุชุฑุฌุงุน
|
| 8 |
-
"""
|
| 9 |
|
| 10 |
from groq import AsyncGroq
|
|
|
|
| 11 |
from fastapi import APIRouter
|
| 12 |
from pydantic import BaseModel
|
| 13 |
|
|
@@ -18,12 +11,30 @@ from app.memory import memory
|
|
| 18 |
|
| 19 |
router = APIRouter()
|
| 20 |
|
|
|
|
| 21 |
_groq_client = AsyncGroq(api_key=settings.groq_api_key)
|
| 22 |
|
| 23 |
|
| 24 |
@router.get("/health")
|
| 25 |
async def health():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
retriever = get_retriever()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
groq_ok = False
|
| 28 |
try:
|
| 29 |
await _groq_client.chat.completions.create(
|
|
@@ -36,6 +47,9 @@ async def health():
|
|
| 36 |
pass
|
| 37 |
|
| 38 |
return {
|
|
|
|
|
|
|
|
|
|
| 39 |
"status": "ok" if groq_ok else "error",
|
| 40 |
"groq_connected": groq_ok,
|
| 41 |
"model": settings.groq_model,
|
|
@@ -56,4 +70,4 @@ async def retrieve(req: RetrieveRequest):
|
|
| 56 |
retriever = get_retriever()
|
| 57 |
k = req.top_k or settings.top_k
|
| 58 |
chunks = retriever.search(req.question, top_k=k)
|
| 59 |
-
return {"chunks": chunks}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
|
| 2 |
from groq import AsyncGroq
|
| 3 |
+
# import httpx
|
| 4 |
from fastapi import APIRouter
|
| 5 |
from pydantic import BaseModel
|
| 6 |
|
|
|
|
| 11 |
|
| 12 |
router = APIRouter()
|
| 13 |
|
| 14 |
+
# _OLLAMA_BASE_URL = settings.ollama_url.split("/api/")[0]
|
| 15 |
_groq_client = AsyncGroq(api_key=settings.groq_api_key)
|
| 16 |
|
| 17 |
|
| 18 |
@router.get("/health")
|
| 19 |
async def health():
|
| 20 |
+
"""
|
| 21 |
+
ุชุญูู ู
ู ุญุงูุฉ ุงููุธุงู
.
|
| 22 |
+
|
| 23 |
+
ููุนูุฏ:
|
| 24 |
+
status: "ok" ุฃู "error"
|
| 25 |
+
ollama_connected: ูู Ollama ูุณุชุฌูุจุ
|
| 26 |
+
chunks_indexed: ุนุฏุฏ chunks ูู ูุงุนุฏุฉ ุงูุจูุงูุงุช
|
| 27 |
+
sessions_active: ุนุฏุฏ ุงูุฌูุณุงุช ุงููุดุทุฉ ูู ุงูุฐุงูุฑุฉ
|
| 28 |
+
"""
|
| 29 |
retriever = get_retriever()
|
| 30 |
+
# ollama_ok = False
|
| 31 |
+
# try:
|
| 32 |
+
# async with httpx.AsyncClient(timeout=httpx.Timeout(5.0)) as client:
|
| 33 |
+
# r = await client.get(f"{_OLLAMA_BASE_URL}/api/tags")
|
| 34 |
+
# ollama_ok = r.status_code == 200
|
| 35 |
+
# except (httpx.HTTPError, OSError):
|
| 36 |
+
# pass
|
| 37 |
+
|
| 38 |
groq_ok = False
|
| 39 |
try:
|
| 40 |
await _groq_client.chat.completions.create(
|
|
|
|
| 47 |
pass
|
| 48 |
|
| 49 |
return {
|
| 50 |
+
# "status": "ok" if ollama_ok else "error",
|
| 51 |
+
# "ollama_connected": ollama_ok,
|
| 52 |
+
# "model": settings.ollama_model,
|
| 53 |
"status": "ok" if groq_ok else "error",
|
| 54 |
"groq_connected": groq_ok,
|
| 55 |
"model": settings.groq_model,
|
|
|
|
| 70 |
retriever = get_retriever()
|
| 71 |
k = req.top_k or settings.top_k
|
| 72 |
chunks = retriever.search(req.question, top_k=k)
|
| 73 |
+
return {"chunks": chunks}
|
app/core/config.py
CHANGED
|
@@ -5,18 +5,26 @@ load_dotenv()
|
|
| 5 |
|
| 6 |
|
| 7 |
class Settings:
|
| 8 |
-
# โโ ูู
ูุฐุฌ ุงููุบุฉ (
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
groq_api_key: str = os.getenv("GROQ_API_KEY", "")
|
| 10 |
groq_model: str = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
|
| 11 |
|
| 12 |
# โโ ุงูุงุณุชุฑุฌุงุน ูุงูุชุถู
ูู โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 13 |
-
# ูู
ูุฐุฌ ุงูุชุถู
ูู โ ูุฌุจ ุฃู ูููู ููุณู ูู ุงูุงุณุชูุนุงุจ ูุงูุงุณุชุฑุฌุงุน ุฏุงุฆู
ุงู
|
| 14 |
embed_model: str = os.getenv("EMBED_MODEL", "paraphrase-multilingual-mpnet-base-v2")
|
| 15 |
chroma_path: str = os.getenv("CHROMA_PATH", "vectorstore")
|
| 16 |
chroma_collection: str = os.getenv("CHROMA_COLLECTION", "rag_docs")
|
| 17 |
top_k: int = min(int(os.getenv("TOP_K", "8")), 8)
|
| 18 |
max_context_chars: int = int(os.getenv("MAX_CONTEXT_CHARS", "10000"))
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
# โโ ุฐุงูุฑุฉ ุงูู
ุญุงุฏุซุฉ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 21 |
max_turns: int = int(os.getenv("MAX_TURNS", "6"))
|
| 22 |
max_sessions: int = int(os.getenv("MAX_SESSIONS", "200"))
|
|
@@ -36,6 +44,8 @@ class Settings:
|
|
| 36 |
]
|
| 37 |
|
| 38 |
# โโ Timeouts โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
|
|
|
|
|
| 39 |
keepalive_interval: int = 20 # ุซุงููุฉ โ heartbeat ููู SSE
|
| 40 |
|
| 41 |
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
class Settings:
|
| 8 |
+
# โโ ูู
ูุฐุฌ ุงููุบุฉ (Ollama) โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 9 |
+
ollama_model: str = os.getenv("OLLAMA_MODEL", "gemma3")
|
| 10 |
+
ollama_url: str = os.getenv("OLLAMA_URL", "http://127.0.0.1:11434/api/chat")
|
| 11 |
+
|
| 12 |
+
# โโ ูู
ูุฐุฌ ุงููุบุฉ (Groq) โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 13 |
groq_api_key: str = os.getenv("GROQ_API_KEY", "")
|
| 14 |
groq_model: str = os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
|
| 15 |
|
| 16 |
# โโ ุงูุงุณุชุฑุฌุงุน ูุงูุชุถู
ูู โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
|
|
| 17 |
embed_model: str = os.getenv("EMBED_MODEL", "paraphrase-multilingual-mpnet-base-v2")
|
| 18 |
chroma_path: str = os.getenv("CHROMA_PATH", "vectorstore")
|
| 19 |
chroma_collection: str = os.getenv("CHROMA_COLLECTION", "rag_docs")
|
| 20 |
top_k: int = min(int(os.getenv("TOP_K", "8")), 8)
|
| 21 |
max_context_chars: int = int(os.getenv("MAX_CONTEXT_CHARS", "10000"))
|
| 22 |
|
| 23 |
+
# โโ Reranker (HuggingFace) โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 24 |
+
#hf_api_token: str = os.getenv("HF_API_TOKEN", "")
|
| 25 |
+
#reranker_model: str = os.getenv("RERANKER_MODEL", "Qwen/Qwen3-Reranker-0.6B")
|
| 26 |
+
#reranker_concurrency: int = int(os.getenv("RERANKER_CONCURRENCY", "4"))
|
| 27 |
+
|
| 28 |
# โโ ุฐุงูุฑุฉ ุงูู
ุญุงุฏุซุฉ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 29 |
max_turns: int = int(os.getenv("MAX_TURNS", "6"))
|
| 30 |
max_sessions: int = int(os.getenv("MAX_SESSIONS", "200"))
|
|
|
|
| 44 |
]
|
| 45 |
|
| 46 |
# โโ Timeouts โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 47 |
+
# None = ูุง timeout (ุงูุงุณุชูุชุงุฌ ุนูู CPU ูุฏ ูุฃุฎุฐ 200+ ุซุงููุฉ)
|
| 48 |
+
ollama_timeout = None
|
| 49 |
keepalive_interval: int = 20 # ุซุงููุฉ โ heartbeat ููู SSE
|
| 50 |
|
| 51 |
|
app/llm/ollama_client.py
CHANGED
|
@@ -16,25 +16,7 @@ async def stream_response(
|
|
| 16 |
session_id: str,
|
| 17 |
original_question: str,
|
| 18 |
):
|
| 19 |
-
|
| 20 |
-
ุจุซ ุงูุฑุฏ ู
ู Ollama ู
ุน ุญูุธ ุงูู
ุญุงุฏุซุฉ ูู ุงูุฐุงูุฑุฉ.
|
| 21 |
-
|
| 22 |
-
ููู ูุนู
ู ุงูู Streamingุ
|
| 23 |
-
โโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 24 |
-
ุจุฏูุงู ู
ู ุงูุงูุชุธุงุฑ ุญุชู ุชูุชู
ู ุงูุฅุฌุงุจุฉ ูุงู
ูุงู (ูุฏ ูุณุชุบุฑู 200 ุซุงููุฉ)ุ
|
| 25 |
-
ููุชุญ ุงุชุตุงูุงู ู
ุณุชู
ุฑุงู ูููุฑุณู ูู ููู
ุฉ ููุฑ ุฅูุชุงุฌูุง.
|
| 26 |
-
ุงูู
ุณุชุฎุฏู
ูุฑู ุงูุฑุฏ ูุธูุฑ ุชุฏุฑูุฌูุงู ูุฃู ุดุฎุตุงู ููุชุจ.
|
| 27 |
-
|
| 28 |
-
Heartbeat (ูุจุถุฉ ุงูููุจ):
|
| 29 |
-
โโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 30 |
-
ุฅุฐุง ูู
ูุฃุชู ุชููู ูู 20 ุซุงููุฉุ ููุฑุณู ู
ุณุงูุฉ ุตุบูุฑุฉ (zero-width space).
|
| 31 |
-
ูุฐุง ูู
ูุน ุงูู
ุชุตูุญ ุฃู ุงูุดุจูุฉ ู
ู ุงุนุชุจุงุฑ ุงูุงุชุตุงู "ู
ุงุช" ููุทุนู.
|
| 32 |
-
|
| 33 |
-
ุงูุญูุธ ูู ุงูุฐุงูุฑุฉ:
|
| 34 |
-
โโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 35 |
-
ุนูุฏ ุงูุชู
ุงู ุงูุฅุฌุงุจุฉุ ูุญูุธ ุงูุณุคุงู ูุงูุฅุฌุงุจุฉ ูู ConversationMemory
|
| 36 |
-
ุญุชู ูุนู
ู ุงูู follow-up ูู ุงูุฑุณุงุฆู ุงูุชุงููุฉ.
|
| 37 |
-
"""
|
| 38 |
full_answer = ""
|
| 39 |
stream_completed = False
|
| 40 |
logger.info("ุจุฏุก ุชูููุฏ ุงูุฅุฌุงุจุฉ | session=%s", session_id)
|
|
@@ -120,12 +102,7 @@ async def stream_response(
|
|
| 120 |
|
| 121 |
|
| 122 |
async def warmup_model() -> bool:
|
| 123 |
-
"""
|
| 124 |
-
ุญู
ูู ุงููู
ูุฐุฌ ูู ุฐุงูุฑุฉ Ollama ุนูุฏ ุจุฏุก ุงูุชุทุจูู.
|
| 125 |
-
ูู
ูุน ุงูุชุฃุฎูุฑ ุงููุจูุฑ ูู ุฃูู ุทูุจ.
|
| 126 |
|
| 127 |
-
ููุนุงุฏ True ุฅุฐุง ูุฌุญุ False ุฅุฐุง ูุดู.
|
| 128 |
-
"""
|
| 129 |
try:
|
| 130 |
async with httpx.AsyncClient(timeout=httpx.Timeout(300.0)) as client:
|
| 131 |
await client.post(
|
|
|
|
| 16 |
session_id: str,
|
| 17 |
original_question: str,
|
| 18 |
):
|
| 19 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
full_answer = ""
|
| 21 |
stream_completed = False
|
| 22 |
logger.info("ุจุฏุก ุชูููุฏ ุงูุฅุฌุงุจุฉ | session=%s", session_id)
|
|
|
|
| 102 |
|
| 103 |
|
| 104 |
async def warmup_model() -> bool:
|
|
|
|
|
|
|
|
|
|
| 105 |
|
|
|
|
|
|
|
| 106 |
try:
|
| 107 |
async with httpx.AsyncClient(timeout=httpx.Timeout(300.0)) as client:
|
| 108 |
await client.post(
|
app/memory/conversation_memory.py
CHANGED
|
@@ -1,20 +1,14 @@
|
|
| 1 |
-
"""
|
| 2 |
-
memory.py โ Production conversation store with TTL eviction.
|
| 3 |
-
Keeps the last N turns per session so "why?" follow-ups work correctly.
|
| 4 |
-
Evicts sessions older than TTL and caps total sessions to prevent OOM.
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
import time
|
| 8 |
import threading
|
| 9 |
from collections import OrderedDict, deque
|
| 10 |
from dataclasses import dataclass
|
| 11 |
from typing import Literal
|
| 12 |
-
import os
|
| 13 |
|
| 14 |
from app.core.config import settings
|
| 15 |
from app.core.logging_setup import get_logger
|
| 16 |
|
| 17 |
logger = get_logger(__name__)
|
|
|
|
| 18 |
@dataclass
|
| 19 |
class Turn:
|
| 20 |
role: Literal["user", "assistant"]
|
|
@@ -108,13 +102,8 @@ class ConversationMemory:
|
|
| 108 |
def session_count(self) -> int:
|
| 109 |
return len(self._sessions)
|
| 110 |
|
| 111 |
-
|
| 112 |
-
# Global singleton shared across all requests
|
| 113 |
-
MAX_TURNS = int(os.getenv("MAX_TURNS", "6"))
|
| 114 |
-
MAX_SESSIONS = int(os.getenv("MAX_SESSIONS", "200"))
|
| 115 |
-
SESSION_TTL = int(os.getenv("SESSION_TTL", "3600"))
|
| 116 |
memory = ConversationMemory(
|
| 117 |
-
max_turns=
|
| 118 |
-
max_sessions=
|
| 119 |
-
ttl_seconds=
|
| 120 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import time
|
| 2 |
import threading
|
| 3 |
from collections import OrderedDict, deque
|
| 4 |
from dataclasses import dataclass
|
| 5 |
from typing import Literal
|
|
|
|
| 6 |
|
| 7 |
from app.core.config import settings
|
| 8 |
from app.core.logging_setup import get_logger
|
| 9 |
|
| 10 |
logger = get_logger(__name__)
|
| 11 |
+
|
| 12 |
@dataclass
|
| 13 |
class Turn:
|
| 14 |
role: Literal["user", "assistant"]
|
|
|
|
| 102 |
def session_count(self) -> int:
|
| 103 |
return len(self._sessions)
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
memory = ConversationMemory(
|
| 106 |
+
max_turns=settings.max_turns,
|
| 107 |
+
max_sessions=settings.max_sessions,
|
| 108 |
+
ttl_seconds=settings.session_ttl,
|
| 109 |
)
|
app/pipeline/chat_pipeline.py
CHANGED
|
@@ -5,7 +5,9 @@ from langdetect import detect as detect_lang, LangDetectException
|
|
| 5 |
|
| 6 |
from app.core.config import settings
|
| 7 |
from app.core.logging_setup import get_logger
|
| 8 |
-
from app.retrieval import get_retriever
|
|
|
|
|
|
|
| 9 |
from app.pipeline.query_handler import is_followup_question, rewrite_query
|
| 10 |
from app.pipeline.context_builder import build_context, extract_sources
|
| 11 |
from app.pipeline.prompt_builder import build_system_prompt
|
|
@@ -37,17 +39,6 @@ async def run(
|
|
| 37 |
session_id: str,
|
| 38 |
history: list[dict],
|
| 39 |
) -> PipelineResult:
|
| 40 |
-
"""
|
| 41 |
-
ูููุฐ pipeline ูุงู
ู ูุณุคุงู ูุงุญุฏ.
|
| 42 |
-
|
| 43 |
-
ุงูู
ุฏุฎูุงุช:
|
| 44 |
-
question: ูุต ุงูุณุคุงู (ุจุนุฏ trim)
|
| 45 |
-
session_id: ู
ุนุฑูู ุงูุฌูุณุฉ
|
| 46 |
-
history: ุชุงุฑูุฎ ุงูู
ุญุงุฏุซุฉ ู
ู ConversationMemory
|
| 47 |
-
|
| 48 |
-
ุงูู
ุฎุฑุฌ:
|
| 49 |
-
PipelineResult ุฌุงูุฒ ููุฅุฑุณุงู ููู LLM
|
| 50 |
-
"""
|
| 51 |
t_start = time.time()
|
| 52 |
lang = _detect_language(question)
|
| 53 |
|
|
@@ -65,9 +56,10 @@ async def run(
|
|
| 65 |
)
|
| 66 |
|
| 67 |
# โโ ุงูุฎุทูุฉ 4: ุฅุนุงุฏุฉ ุงูุชุฑุชูุจ ุจุงูู Reranker โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 68 |
-
#
|
| 69 |
chunks = await rerank_chunks(search_query, chunks, top_k=5, lang=lang)
|
| 70 |
-
|
|
|
|
| 71 |
# โโ ุงูุฎุทูุฉ 5: ุจูุงุก ุงูุณูุงู โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 72 |
context = build_context(chunks)
|
| 73 |
sources = extract_sources(chunks)
|
|
|
|
| 5 |
|
| 6 |
from app.core.config import settings
|
| 7 |
from app.core.logging_setup import get_logger
|
| 8 |
+
from app.retrieval import get_retriever
|
| 9 |
+
# from app.retrieval import get_retriever # rerank_chunks DISABLED: using Groq API
|
| 10 |
+
from app.retrieval.reranker import rerank_chunks
|
| 11 |
from app.pipeline.query_handler import is_followup_question, rewrite_query
|
| 12 |
from app.pipeline.context_builder import build_context, extract_sources
|
| 13 |
from app.pipeline.prompt_builder import build_system_prompt
|
|
|
|
| 39 |
session_id: str,
|
| 40 |
history: list[dict],
|
| 41 |
) -> PipelineResult:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
t_start = time.time()
|
| 43 |
lang = _detect_language(question)
|
| 44 |
|
|
|
|
| 56 |
)
|
| 57 |
|
| 58 |
# โโ ุงูุฎุทูุฉ 4: ุฅุนุงุฏุฉ ุงูุชุฑุชูุจ ุจุงูู Reranker โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 59 |
+
# DISABLED: local HuggingFace reranker replaced by Groq API before discussion day.
|
| 60 |
chunks = await rerank_chunks(search_query, chunks, top_k=5, lang=lang)
|
| 61 |
+
# chunks = chunks[:5] # fallback: take top 5 from RRF order
|
| 62 |
+
|
| 63 |
# โโ ุงูุฎุทูุฉ 5: ุจูุงุก ุงูุณูุงู โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 64 |
context = build_context(chunks)
|
| 65 |
sources = extract_sources(chunks)
|
app/pipeline/context_builder.py
CHANGED
|
@@ -2,19 +2,7 @@ from app.core.config import settings
|
|
| 2 |
|
| 3 |
|
| 4 |
def format_chunk(index: int, chunk: dict) -> str:
|
| 5 |
-
|
| 6 |
-
ูุณูู chunk ูุงุญุฏ ู
ุน ุชุฑููุณุฉ ุชุญุชูู ุนูู ู
ุนููู
ุงุช ุงูุณูุงู.
|
| 7 |
-
|
| 8 |
-
ุงูู
ุฏุฎู:
|
| 9 |
-
index: ุฑูู
ุงูู chunk (0-based)
|
| 10 |
-
chunk: dict ูุญุชูู ุนูู "text" ู "metadata"
|
| 11 |
-
|
| 12 |
-
ุงูู
ุฎุฑุฌ:
|
| 13 |
-
ูุต ู
ูุณูู ู
ุน ุชุฑููุณุฉ ุจูู ุฃููุงุณ ู
ุฑุจุนุฉ
|
| 14 |
-
ู
ุซุงู:
|
| 15 |
-
[ู
ูุชุทู 1 โ ุงูุณูุงู: ุจุฑูุงู
ุฌ ุงูุฑูุงุถูุงุช โ ุงูู
ุณุชูู 3 โ ุงููุตู: ุงูุฃูู]
|
| 16 |
-
... ูุต ุงูู chunk ...
|
| 17 |
-
"""
|
| 18 |
meta = chunk.get("metadata", {})
|
| 19 |
article = meta.get("article_number", "")
|
| 20 |
breadcrumb = (
|
|
|
|
| 2 |
|
| 3 |
|
| 4 |
def format_chunk(index: int, chunk: dict) -> str:
|
| 5 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
meta = chunk.get("metadata", {})
|
| 7 |
article = meta.get("article_number", "")
|
| 8 |
breadcrumb = (
|
app/pipeline/prompt_builder.py
CHANGED
|
@@ -7,15 +7,7 @@ _PROMPTS_DIR = Path(__file__).resolve().parent.parent.parent / "prompts"
|
|
| 7 |
|
| 8 |
|
| 9 |
def build_system_prompt(language: str) -> str:
|
| 10 |
-
"""
|
| 11 |
-
ุงุฑุฌุน ุงูู system prompt ุงูู
ูุงุณุจ ุญุณุจ ุงููุบุฉ.
|
| 12 |
|
| 13 |
-
ุงูู
ุนุงู
ูุงุช:
|
| 14 |
-
language: "ar" ููุนุฑุจูุฉุ "en" ููุฅูุฌููุฒูุฉ
|
| 15 |
-
|
| 16 |
-
ุงูุฅุฑุฌุงุน:
|
| 17 |
-
ูุต ุงูู prompt ุงููุงู
ู
|
| 18 |
-
"""
|
| 19 |
filename = "system_ar.txt" if language == "ar" else "system_en.txt"
|
| 20 |
prompt_path = _PROMPTS_DIR / filename
|
| 21 |
|
|
|
|
| 7 |
|
| 8 |
|
| 9 |
def build_system_prompt(language: str) -> str:
|
|
|
|
|
|
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
filename = "system_ar.txt" if language == "ar" else "system_en.txt"
|
| 12 |
prompt_path = _PROMPTS_DIR / filename
|
| 13 |
|
app/pipeline/query_handler.py
CHANGED
|
@@ -1,25 +1,14 @@
|
|
| 1 |
import json
|
| 2 |
-
|
| 3 |
from app.core.config import settings
|
| 4 |
from app.core.logging_setup import get_logger
|
| 5 |
|
| 6 |
logger = get_logger(__name__)
|
| 7 |
|
| 8 |
-
|
| 9 |
|
| 10 |
|
| 11 |
def is_followup_question(question: str) -> bool:
|
| 12 |
-
"""
|
| 13 |
-
ุงูุดู ู
ุง ุฅุฐุง ูุงู ุงูุณุคุงู ูุนุชู
ุฏ ุนูู context ุณุงุจู.
|
| 14 |
-
|
| 15 |
-
ูุนุชู
ุฏ ุนูู 4 ุฅุดุงุฑุงุช:
|
| 16 |
-
- ูุตูุฑ (โค8 ููู
ุงุช)
|
| 17 |
-
- ูุญุชูู ุนูู ููู
ุฉ ุงุณุชููุงู
ุบูุฑ ู
ุญุฏุฏุฉ
|
| 18 |
-
- ูุญุชูู ุนูู ุถู
ูุฑ ุฅุดุงุฑู (ุฏู/ูุฐุง)
|
| 19 |
-
- ูุจุฏุฃ ุจุญุฑู ุนุทู (ู/ู/ููู)
|
| 20 |
-
|
| 21 |
-
ุฅุฐุง ุชูููุฑุช ุฅุดุงุฑุชุงู ุฃู ุฃูุซุฑ โ ุณุคุงู ู
ุชุงุจูุน.
|
| 22 |
-
"""
|
| 23 |
followup_keywords = [
|
| 24 |
"ูู
ุงุฐุง", "ููู", "ู
ุงุฐุง", "ูุถุญ", "ุงุดุฑุญ", "ูุนูู", "ุทูุจ", "ูุฅูู",
|
| 25 |
"why", "how", "what do you mean", "explain", "elaborate",
|
|
@@ -37,11 +26,6 @@ def is_followup_question(question: str) -> bool:
|
|
| 37 |
|
| 38 |
|
| 39 |
async def rewrite_query(question: str, history: list[dict]) -> str:
|
| 40 |
-
"""
|
| 41 |
-
ุฃุนุฏ ุตูุงุบุฉ ุงูุณุคุงู ููููู ู
ุณุชููุงู ุจุงุณุชุฎุฏุงู
Groq.
|
| 42 |
-
|
| 43 |
-
ุฅุฐุง ูุดู ุงูู LLM ูุฃู ุณุจุจ โ ููุนุงุฏ ุงูุณุคุงู ุงูุฃุตูู ุจุฏูู ุชุบููุฑ.
|
| 44 |
-
"""
|
| 45 |
if not history:
|
| 46 |
return question
|
| 47 |
|
|
@@ -55,16 +39,20 @@ async def rewrite_query(question: str, history: list[dict]) -> str:
|
|
| 55 |
)
|
| 56 |
|
| 57 |
try:
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
except Exception as exc:
|
| 68 |
logger.warning("ูุดู ุฅุนุงุฏุฉ ุงูุตูุงุบุฉ: %s", exc)
|
| 69 |
|
| 70 |
-
return question # fallback: ุงูุณุคุงู ุงูุฃุตูู
|
|
|
|
| 1 |
import json
|
| 2 |
+
import httpx
|
| 3 |
from app.core.config import settings
|
| 4 |
from app.core.logging_setup import get_logger
|
| 5 |
|
| 6 |
logger = get_logger(__name__)
|
| 7 |
|
| 8 |
+
_OLLAMA_TIMEOUT = httpx.Timeout(settings.ollama_timeout)
|
| 9 |
|
| 10 |
|
| 11 |
def is_followup_question(question: str) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
followup_keywords = [
|
| 13 |
"ูู
ุงุฐุง", "ููู", "ู
ุงุฐุง", "ูุถุญ", "ุงุดุฑุญ", "ูุนูู", "ุทูุจ", "ูุฅูู",
|
| 14 |
"why", "how", "what do you mean", "explain", "elaborate",
|
|
|
|
| 26 |
|
| 27 |
|
| 28 |
async def rewrite_query(question: str, history: list[dict]) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
if not history:
|
| 30 |
return question
|
| 31 |
|
|
|
|
| 39 |
)
|
| 40 |
|
| 41 |
try:
|
| 42 |
+
async with httpx.AsyncClient(timeout=_OLLAMA_TIMEOUT) as client:
|
| 43 |
+
response = await client.post(
|
| 44 |
+
settings.ollama_url,
|
| 45 |
+
json={
|
| 46 |
+
"model": settings.ollama_model,
|
| 47 |
+
"messages": [{"role": "user", "content": rewrite_prompt}],
|
| 48 |
+
"stream": False,
|
| 49 |
+
},
|
| 50 |
+
)
|
| 51 |
+
if response.status_code == 200:
|
| 52 |
+
rewritten = response.json()["message"]["content"].strip()
|
| 53 |
+
logger.info("ุชู
ุช ุฅุนุงุฏุฉ ุตูุงุบุฉ ุงูุณุคุงู: %s", rewritten)
|
| 54 |
+
return rewritten
|
| 55 |
except Exception as exc:
|
| 56 |
logger.warning("ูุดู ุฅุนุงุฏุฉ ุงูุตูุงุบุฉ: %s", exc)
|
| 57 |
|
| 58 |
+
return question # fallback: ุงูุณุคุงู ุงูุฃุตูู
|
app/retrieval/__init__.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
| 1 |
from app.retrieval.retriever import get_retriever, reset_retriever
|
| 2 |
-
from app.retrieval.reranker import rerank_chunks
|
|
|
|
| 1 |
from app.retrieval.retriever import get_retriever, reset_retriever
|
| 2 |
+
# from app.retrieval.reranker import rerank_chunks # DISABLED: using Groq API reranker
|
app/retrieval/retriever.py
CHANGED
|
@@ -1,20 +1,3 @@
|
|
| 1 |
-
"""
|
| 2 |
-
retriever.py โ Hybrid retriever with weighted RRF fusion (CPU-only production)
|
| 3 |
-
==============================================================================
|
| 4 |
-
- CPU-only embedding (GPU reserved for Ollama LLM)
|
| 5 |
-
- Arabic-aware BM25 tokenizer (diacritics, prefix stripping, alef normalization)
|
| 6 |
-
- BM25 index persisted via joblib โ skips rebuild if collection unchanged
|
| 7 |
-
- top_k hard-capped at 8 (raised from 5) to give LLM enough rows to
|
| 8 |
-
reconstruct multi-row academic tables without OOM risk on CPU
|
| 9 |
-
- fetch_k = top_k ร 4 โ wide candidate pool for fragmented tables
|
| 10 |
-
- Weighted RRF: structural queries (level/dept/course) get 2ร vector weight
|
| 11 |
-
and 0.5ร BM25 weight to suppress noise from ubiquitous terms like "ุณุงุนุฉ"
|
| 12 |
-
- BM25 score threshold: skip BM25 results when max raw score < 0.1
|
| 13 |
-
(query has no meaningful keyword match โ prevents random noise from
|
| 14 |
-
contaminating the fusion ranking)
|
| 15 |
-
- reset_retriever() holds _init_lock to prevent concurrent partial-reset reads
|
| 16 |
-
"""
|
| 17 |
-
|
| 18 |
import re
|
| 19 |
import time
|
| 20 |
import joblib
|
|
@@ -193,16 +176,6 @@ def _is_structural_query(query: str) -> bool:
|
|
| 193 |
|
| 194 |
|
| 195 |
def _build_metadata_filter(query: str) -> dict | None:
|
| 196 |
-
"""Build a ChromaDB `where` filter from the query's level/semester mentions.
|
| 197 |
-
|
| 198 |
-
When a user asks about "ุงูู
ุณุชูู ุงูุฃูู ุงููุตู ุงูุซุงูู", this returns a filter
|
| 199 |
-
that restricts vector search to chunks whose `level_number` = "1" AND
|
| 200 |
-
`semester` = "ุงูุซุงูู". This prevents Level-2/3/4 chunks (which may be
|
| 201 |
-
semantically closer due to course-code overlap) from outranking the
|
| 202 |
-
actually-requested Level-1 chunks.
|
| 203 |
-
|
| 204 |
-
Returns None if no level/semester can be extracted (general query).
|
| 205 |
-
"""
|
| 206 |
level = _extract_level_number(query)
|
| 207 |
semester = _extract_semester(query)
|
| 208 |
|
|
@@ -221,7 +194,11 @@ def _build_metadata_filter(query: str) -> dict | None:
|
|
| 221 |
|
| 222 |
|
| 223 |
def _select_device() -> str:
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
|
| 226 |
|
| 227 |
# โโ Retriever โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
@@ -229,8 +206,8 @@ def _select_device() -> str:
|
|
| 229 |
class Retriever:
|
| 230 |
def __init__(self):
|
| 231 |
device = _select_device()
|
| 232 |
-
logger.info("[INIT] Embedding device:
|
| 233 |
-
self.embed_model = SentenceTransformer(settings.embed_model,
|
| 234 |
self.client = chromadb.PersistentClient(path=settings.chroma_path)
|
| 235 |
self.collection = self.client.get_or_create_collection(name=settings.chroma_collection)
|
| 236 |
|
|
@@ -255,7 +232,7 @@ class Retriever:
|
|
| 255 |
print(f"[CACHE] BM25 loaded ({len(self.documents)} docs)")
|
| 256 |
return
|
| 257 |
except Exception as e:
|
| 258 |
-
logger.warning("[WARN] BM25 cache invalid:
|
| 259 |
|
| 260 |
print("[BUILD] Building BM25 index...")
|
| 261 |
all_docs = self.collection.get()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import re
|
| 2 |
import time
|
| 3 |
import joblib
|
|
|
|
| 176 |
|
| 177 |
|
| 178 |
def _build_metadata_filter(query: str) -> dict | None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
level = _extract_level_number(query)
|
| 180 |
semester = _extract_semester(query)
|
| 181 |
|
|
|
|
| 194 |
|
| 195 |
|
| 196 |
def _select_device() -> str:
|
| 197 |
+
try:
|
| 198 |
+
import torch
|
| 199 |
+
return "cuda" if torch.cuda.is_available() else "cpu"
|
| 200 |
+
except ImportError:
|
| 201 |
+
return "cpu"
|
| 202 |
|
| 203 |
|
| 204 |
# โโ Retriever โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
|
|
| 206 |
class Retriever:
|
| 207 |
def __init__(self):
|
| 208 |
device = _select_device()
|
| 209 |
+
logger.info("[INIT] Embedding device: %s", device)
|
| 210 |
+
self.embed_model = SentenceTransformer(settings.embed_model, device=device)
|
| 211 |
self.client = chromadb.PersistentClient(path=settings.chroma_path)
|
| 212 |
self.collection = self.client.get_or_create_collection(name=settings.chroma_collection)
|
| 213 |
|
|
|
|
| 232 |
print(f"[CACHE] BM25 loaded ({len(self.documents)} docs)")
|
| 233 |
return
|
| 234 |
except Exception as e:
|
| 235 |
+
logger.warning("[WARN] BM25 cache invalid: %s", e)
|
| 236 |
|
| 237 |
print("[BUILD] Building BM25 index...")
|
| 238 |
all_docs = self.collection.get()
|
main.py
CHANGED
|
@@ -1,17 +1,6 @@
|
|
| 1 |
-
"""
|
| 2 |
-
main.py โ ููุทุฉ ุงูุฏุฎูู ุงููุญูุฏุฉ ููุชุทุจูู.
|
| 3 |
-
=========================================
|
| 4 |
-
ูุฐุง ุงูู
ูู ู
ุณุคูู ุนู ุดูุก ูุงุญุฏ ููุท:
|
| 5 |
-
ุชุฌู
ูุน ูู ุฃุฌุฒุงุก ุงูุชุทุจูู ูุชุดุบููู.
|
| 6 |
-
|
| 7 |
-
ูุง ูุญุชูู ุนูู ุฃู ู
ูุทู.
|
| 8 |
-
ุฃู ู
ูุทู ูุฌุจ ุฃู ูููู ูู app/
|
| 9 |
-
"""
|
| 10 |
-
|
| 11 |
import os
|
| 12 |
import sys
|
| 13 |
|
| 14 |
-
# ุฅุฌุจุงุฑ UTF-8 ุนูู Windows
|
| 15 |
if sys.stdout.encoding != "utf-8":
|
| 16 |
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
|
| 17 |
if sys.stderr.encoding != "utf-8":
|
|
@@ -29,26 +18,21 @@ from app.core.logging_setup import setup_logging, get_logger
|
|
| 29 |
from app.api.routes_chat import router as chat_router
|
| 30 |
from app.api.routes_health import router as health_router
|
| 31 |
|
| 32 |
-
# ุฅุนุฏุงุฏ ุงูู logging ููุฑุงู
|
| 33 |
setup_logging()
|
| 34 |
logger = get_logger("startup")
|
| 35 |
|
| 36 |
|
| 37 |
@asynccontextmanager
|
| 38 |
async def lifespan(app: FastAPI):
|
| 39 |
-
"""
|
| 40 |
-
Startup/Shutdown hooks.
|
| 41 |
-
ููููููุฐ ุนูุฏ ุจุฏุก ุงูุชุดุบููุ ู yield ูุนูู "ุงูุชุทุจูู ูุนู
ู ุงูุขู".
|
| 42 |
-
"""
|
| 43 |
from pathlib import Path
|
| 44 |
-
from app.retrieval import get_retriever
|
|
|
|
| 45 |
from app.llm.groq_client import warmup_model
|
|
|
|
| 46 |
|
| 47 |
-
# ุฃูุดุฆ ุงูู
ุฌูุฏุงุช ุงููุงุฒู
ุฉ
|
| 48 |
Path(settings.data_dir).mkdir(parents=True, exist_ok=True)
|
| 49 |
Path("data/pdfs").mkdir(parents=True, exist_ok=True)
|
| 50 |
|
| 51 |
-
# ุชุญูู ู
ู ุงูู vectorstore โ ุฃุนุฏ ุงูุงุณุชูุนุงุจ ุชููุงุฆูุงู ุฅุฐุง ูุงู ูุงุฑุบุงู
|
| 52 |
guide_path = Path(settings.data_dir) / "guide.md"
|
| 53 |
if guide_path.exists():
|
| 54 |
retriever = get_retriever()
|
|
@@ -56,30 +40,20 @@ async def lifespan(app: FastAPI):
|
|
| 56 |
logger.info("ูุงุนุฏุฉ ุงูุจูุงูุงุช ุงูู
ุชุฌููุฉ ูุงุฑุบุฉ โ ุจุฏุก ุงูุงุณุชูุนุงุจ ุงูุชููุงุฆู...")
|
| 57 |
from app.ingestion import ingest_all_markdown
|
| 58 |
ingest_all_markdown(settings.data_dir)
|
| 59 |
-
# โโ ุฅุนุงุฏุฉ ุจูุงุก ุงูู Retriever ุจุนุฏ ุงูุงุณุชูุนุงุจ โโโโโโโโโโโโโโโโโโโโโ
|
| 60 |
-
# ุงูู singleton ุงููุฏูู
ุฃููุดุฆ ูุงูู
ุฌู
ูุนุฉ ูุงุฑุบุฉุ ูุฐุง documents=[]
|
| 61 |
-
# ู bm25=None. ูุฌุจ ุฅุนุงุฏุฉ ุฅูุดุงุฆู ูููุฑุฃ ุงูุจูุงูุงุช ุงูุฌุฏูุฏุฉ.
|
| 62 |
-
reset_retriever()
|
| 63 |
-
logger.info("ุชู
ุฅุนุงุฏุฉ ุชููุฆุฉ ุงูู Retriever ุจุนุฏ ุงูุงุณุชูุนุงุจ")
|
| 64 |
|
| 65 |
-
# ุชุณุฎูู ูู
ูุฐุฌ ุงูุชุถู
ูู
|
| 66 |
retriever = get_retriever()
|
| 67 |
-
logger.info("ุนุฏุฏ ุงูู chunks ูู ุงูู
ุฌู
ูุนุฉ: %d", retriever.collection.count())
|
| 68 |
retriever.embed_model.encode(["warm up"], normalize_embeddings=True)
|
| 69 |
logger.info("ุชู
ุชุณุฎูู ูู
ูุฐุฌ ุงูุชุถู
ูู")
|
| 70 |
|
| 71 |
-
# ุชุญู
ูู Groq ู
ุณุจูุงู
|
| 72 |
await warmup_model()
|
| 73 |
-
|
| 74 |
-
# ุชุญูู ู
ู ุงูู Reranker
|
| 75 |
await warmup_reranker()
|
| 76 |
|
| 77 |
-
logger.info("
|
|
|
|
| 78 |
yield
|
| 79 |
logger.info("ุงูุชุทุจูู ููุบูู...")
|
| 80 |
|
| 81 |
|
| 82 |
-
# โโ ุฅูุดุงุก ุงูุชุทุจูู โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 83 |
app = FastAPI(
|
| 84 |
title="ASU RAG Chatbot",
|
| 85 |
version="3.0.0",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import sys
|
| 3 |
|
|
|
|
| 4 |
if sys.stdout.encoding != "utf-8":
|
| 5 |
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
|
| 6 |
if sys.stderr.encoding != "utf-8":
|
|
|
|
| 18 |
from app.api.routes_chat import router as chat_router
|
| 19 |
from app.api.routes_health import router as health_router
|
| 20 |
|
|
|
|
| 21 |
setup_logging()
|
| 22 |
logger = get_logger("startup")
|
| 23 |
|
| 24 |
|
| 25 |
@asynccontextmanager
|
| 26 |
async def lifespan(app: FastAPI):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
from pathlib import Path
|
| 28 |
+
from app.retrieval import get_retriever
|
| 29 |
+
# from app.llm.ollama_client import warmup_model
|
| 30 |
from app.llm.groq_client import warmup_model
|
| 31 |
+
from app.retrieval.reranker import warmup_reranker
|
| 32 |
|
|
|
|
| 33 |
Path(settings.data_dir).mkdir(parents=True, exist_ok=True)
|
| 34 |
Path("data/pdfs").mkdir(parents=True, exist_ok=True)
|
| 35 |
|
|
|
|
| 36 |
guide_path = Path(settings.data_dir) / "guide.md"
|
| 37 |
if guide_path.exists():
|
| 38 |
retriever = get_retriever()
|
|
|
|
| 40 |
logger.info("ูุงุนุฏุฉ ุงูุจูุงูุงุช ุงูู
ุชุฌููุฉ ูุงุฑุบุฉ โ ุจุฏุก ุงูุงุณุชูุนุงุจ ุงูุชููุงุฆู...")
|
| 41 |
from app.ingestion import ingest_all_markdown
|
| 42 |
ingest_all_markdown(settings.data_dir)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
|
|
|
| 44 |
retriever = get_retriever()
|
|
|
|
| 45 |
retriever.embed_model.encode(["warm up"], normalize_embeddings=True)
|
| 46 |
logger.info("ุชู
ุชุณุฎูู ูู
ูุฐุฌ ุงูุชุถู
ูู")
|
| 47 |
|
|
|
|
| 48 |
await warmup_model()
|
|
|
|
|
|
|
| 49 |
await warmup_reranker()
|
| 50 |
|
| 51 |
+
# logger.info("ุงูุชุทุจูู ุฌุงูุฒ โ %s", settings.ollama_model)
|
| 52 |
+
logger.info("ุงูุชุทุจูู ุฌุงูุฒ โ %s", settings.groq_model)
|
| 53 |
yield
|
| 54 |
logger.info("ุงูุชุทุจูู ููุบูู...")
|
| 55 |
|
| 56 |
|
|
|
|
| 57 |
app = FastAPI(
|
| 58 |
title="ASU RAG Chatbot",
|
| 59 |
version="3.0.0",
|
requirements.txt
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
| 1 |
fastapi==0.111.0
|
| 2 |
uvicorn[standard]==0.29.0
|
| 3 |
python-multipart==0.0.9
|
|
@@ -7,12 +10,12 @@ sentence-transformers==3.0.1
|
|
| 7 |
|
| 8 |
# Vector DB (must match vectorstore format)
|
| 9 |
chromadb==0.5.3
|
| 10 |
-
posthog==3.0.2
|
| 11 |
|
| 12 |
-
#
|
| 13 |
groq==0.9.0
|
| 14 |
-
|
| 15 |
-
#
|
|
|
|
| 16 |
httpx==0.27.0
|
| 17 |
|
| 18 |
# NLP utilities
|
|
@@ -23,7 +26,6 @@ rank-bm25==0.2.2
|
|
| 23 |
# Data stack (CRITICAL pins)
|
| 24 |
numpy==1.26.4
|
| 25 |
scikit-learn==1.4.2
|
| 26 |
-
pandas==2.2.2
|
| 27 |
joblib>=1.3.0
|
| 28 |
|
| 29 |
# Optional but stabilizes HF stack
|
|
@@ -35,6 +37,9 @@ python-dotenv==1.0.1
|
|
| 35 |
pydantic==2.7.1
|
| 36 |
pydantic-settings==2.2.1
|
| 37 |
|
| 38 |
-
# Torch
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# PyTorch index โ CUDA 12.1 (GPU). For CPU: change cu121 to cpu
|
| 2 |
+
--extra-index-url https://download.pytorch.org/whl/cu121
|
| 3 |
+
|
| 4 |
fastapi==0.111.0
|
| 5 |
uvicorn[standard]==0.29.0
|
| 6 |
python-multipart==0.0.9
|
|
|
|
| 10 |
|
| 11 |
# Vector DB (must match vectorstore format)
|
| 12 |
chromadb==0.5.3
|
|
|
|
| 13 |
|
| 14 |
+
# LLM + Reranker HTTP client
|
| 15 |
groq==0.9.0
|
| 16 |
+
# httpx is used for:
|
| 17 |
+
# 1. Streaming Ollama responses
|
| 18 |
+
# 2. Reranker API calls (currently Groq โ HuggingFace disable for now)
|
| 19 |
httpx==0.27.0
|
| 20 |
|
| 21 |
# NLP utilities
|
|
|
|
| 26 |
# Data stack (CRITICAL pins)
|
| 27 |
numpy==1.26.4
|
| 28 |
scikit-learn==1.4.2
|
|
|
|
| 29 |
joblib>=1.3.0
|
| 30 |
|
| 31 |
# Optional but stabilizes HF stack
|
|
|
|
| 37 |
pydantic==2.7.1
|
| 38 |
pydantic-settings==2.2.1
|
| 39 |
|
| 40 |
+
# Torch โ CUDA 12.1 build for RTX 3050 GPU acceleration (default)
|
| 41 |
+
# To switch to CPU instead:
|
| 42 |
+
# Step 1: Comment out the --extra-index-url line at the top of this file
|
| 43 |
+
# Step 2: Replace the line below with: torch==2.3.1+cpu
|
| 44 |
+
# Step 3: Run: pip install torch==2.3.1+cpu --extra-index-url https://download.pytorch.org/whl/cpu
|
| 45 |
+
torch==2.3.1+cu121
|
setup.sh
CHANGED
|
@@ -6,34 +6,41 @@ set -e
|
|
| 6 |
|
| 7 |
echo ""
|
| 8 |
echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
|
| 9 |
-
echo "โ
|
| 10 |
echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
|
| 11 |
echo ""
|
| 12 |
|
| 13 |
# โโ 1. Python virtual environment โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 14 |
if [ ! -d ".venv" ]; then
|
| 15 |
-
echo "
|
| 16 |
python -m venv .venv
|
| 17 |
fi
|
| 18 |
source .venv/bin/activate
|
| 19 |
|
| 20 |
-
echo "
|
| 21 |
pip install --upgrade pip -q
|
| 22 |
pip install -r requirements.txt -q
|
| 23 |
-
echo "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
# โโ 2. Ollama check โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 26 |
echo ""
|
| 27 |
-
echo "
|
| 28 |
if ! command -v ollama &> /dev/null; then
|
| 29 |
-
echo "
|
| 30 |
-
echo "
|
| 31 |
exit 1
|
| 32 |
fi
|
| 33 |
|
| 34 |
# Check if Ollama is running
|
| 35 |
if ! curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
|
| 36 |
-
echo "
|
| 37 |
ollama serve &
|
| 38 |
sleep 3
|
| 39 |
fi
|
|
@@ -42,38 +49,34 @@ echo "โ
Ollama is running"
|
|
| 42 |
|
| 43 |
# โโ 3. Pull LLM model if needed โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 44 |
echo ""
|
| 45 |
-
echo "
|
| 46 |
if ! ollama list | grep -q "gemma3"; then
|
| 47 |
-
echo "
|
| 48 |
ollama pull gemma3
|
| 49 |
else
|
| 50 |
-
echo "
|
| 51 |
fi
|
| 52 |
|
| 53 |
# โโ 4. Prepare Markdown knowledge base โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 54 |
echo ""
|
| 55 |
-
echo "
|
| 56 |
mkdir -p data/markdown
|
| 57 |
|
| 58 |
MD_FILES=$(find data/markdown -name "*.md" 2>/dev/null | wc -l)
|
| 59 |
if [ "$MD_FILES" -gt 0 ]; then
|
| 60 |
-
echo "
|
| 61 |
-
|
| 62 |
-
echo "โ
Knowledge base ready"
|
| 63 |
else
|
| 64 |
-
echo "
|
| 65 |
-
echo "
|
| 66 |
-
echo " โข Run: python ingest_markdown.py"
|
| 67 |
-
echo " โข Or upload via the web UI at http://localhost:8000"
|
| 68 |
fi
|
| 69 |
|
| 70 |
# โโ 5. Start FastAPI โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 71 |
echo ""
|
| 72 |
-
echo "
|
| 73 |
-
echo "
|
| 74 |
-
echo "
|
| 75 |
-
echo "
|
| 76 |
-
echo " Press Ctrl+C to stop"
|
| 77 |
echo ""
|
| 78 |
|
| 79 |
uvicorn main:app --host 0.0.0.0 --port 8000 --reload
|
|
|
|
| 6 |
|
| 7 |
echo ""
|
| 8 |
echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
|
| 9 |
+
echo "โ ASU RAG Chatbot โ Setup Script โ"
|
| 10 |
echo "โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ"
|
| 11 |
echo ""
|
| 12 |
|
| 13 |
# โโ 1. Python virtual environment โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 14 |
if [ ! -d ".venv" ]; then
|
| 15 |
+
echo "Creating Python virtual environment..."
|
| 16 |
python -m venv .venv
|
| 17 |
fi
|
| 18 |
source .venv/bin/activate
|
| 19 |
|
| 20 |
+
echo "Installing Python dependencies..."
|
| 21 |
pip install --upgrade pip -q
|
| 22 |
pip install -r requirements.txt -q
|
| 23 |
+
echo "Python dependencies installed"
|
| 24 |
+
|
| 25 |
+
# โโ .env setup โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 26 |
+
if [ ! -f ".env" ]; then
|
| 27 |
+
echo "Creating .env from template..."
|
| 28 |
+
cp .env.example .env
|
| 29 |
+
echo ".env created โ edit it if needed"
|
| 30 |
+
fi
|
| 31 |
|
| 32 |
# โโ 2. Ollama check โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 33 |
echo ""
|
| 34 |
+
echo "Checking Ollama..."
|
| 35 |
if ! command -v ollama &> /dev/null; then
|
| 36 |
+
echo "Ollama not found. Install it from: https://ollama.com/download"
|
| 37 |
+
echo "Then run: ollama pull gemma3"
|
| 38 |
exit 1
|
| 39 |
fi
|
| 40 |
|
| 41 |
# Check if Ollama is running
|
| 42 |
if ! curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
|
| 43 |
+
echo "Starting Ollama server in background..."
|
| 44 |
ollama serve &
|
| 45 |
sleep 3
|
| 46 |
fi
|
|
|
|
| 49 |
|
| 50 |
# โโ 3. Pull LLM model if needed โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 51 |
echo ""
|
| 52 |
+
echo "Checking for gemma3 model..."
|
| 53 |
if ! ollama list | grep -q "gemma3"; then
|
| 54 |
+
echo "Pulling gemma3 (this downloads ~4 GB once)..."
|
| 55 |
ollama pull gemma3
|
| 56 |
else
|
| 57 |
+
echo "gemma3 already available"
|
| 58 |
fi
|
| 59 |
|
| 60 |
# โโ 4. Prepare Markdown knowledge base โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 61 |
echo ""
|
| 62 |
+
echo "Preparing data/markdown/ directory..."
|
| 63 |
mkdir -p data/markdown
|
| 64 |
|
| 65 |
MD_FILES=$(find data/markdown -name "*.md" 2>/dev/null | wc -l)
|
| 66 |
if [ "$MD_FILES" -gt 0 ]; then
|
| 67 |
+
echo "Found $MD_FILES Markdown file(s). Ingestion will run automatically on server startup."
|
| 68 |
+
|
|
|
|
| 69 |
else
|
| 70 |
+
echo "No Markdown files found in data/markdown/"
|
| 71 |
+
echo "โข Place your .md files then restart the server โ ingestion runs automatically"
|
|
|
|
|
|
|
| 72 |
fi
|
| 73 |
|
| 74 |
# โโ 5. Start FastAPI โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 75 |
echo ""
|
| 76 |
+
echo "Starting FastAPI server..."
|
| 77 |
+
echo "API docs: http://localhost:8000/docs"
|
| 78 |
+
echo "Health: http://localhost:8000/health"
|
| 79 |
+
echo "Press Ctrl+C to stop"
|
|
|
|
| 80 |
echo ""
|
| 81 |
|
| 82 |
uvicorn main:app --host 0.0.0.0 --port 8000 --reload
|
test_reranker.py
DELETED
|
@@ -1,28 +0,0 @@
|
|
| 1 |
-
import asyncio
|
| 2 |
-
import logging
|
| 3 |
-
from app.retrieval.reranker import rerank_chunks, warmup_reranker
|
| 4 |
-
|
| 5 |
-
logging.basicConfig(level=logging.INFO)
|
| 6 |
-
|
| 7 |
-
async def test():
|
| 8 |
-
print("Testing warmup...")
|
| 9 |
-
ok = await warmup_reranker()
|
| 10 |
-
print("Warmup OK:", ok)
|
| 11 |
-
|
| 12 |
-
chunks = [
|
| 13 |
-
{'text': 'ุชุชููู ูููุฉ ุงูุนููู
ู
ู ุงูุณุงู
ุงูุฑูุงุถูุงุช ูุงูููุฒูุงุก ูุงูููู
ูุงุก', 'source': 'guide.md', 'rrf_score': 0.5, 'metadata': {}},
|
| 14 |
-
{'text': 'ูุฌุจ ุนูู ุงูุทุงูุจ ุงุฌุชูุงุฒ 140 ุณุงุนุฉ ู
ุนุชู
ุฏุฉ', 'source': 'guide.md', 'rrf_score': 0.4, 'metadata': {}},
|
| 15 |
-
{'text': 'ุงูุทูุณ ุฌู
ูู ุงูููู
', 'source': 'x.md', 'rrf_score': 0.3, 'metadata': {}},
|
| 16 |
-
{'text': 'ูุณู
ุงูุฑูุงุถูุงุช ูุถู
ุชุฎุตุตุงุช ุนุฏูุฏุฉ', 'source': 'guide.md', 'rrf_score': 0.2, 'metadata': {}},
|
| 17 |
-
{'text': 'ู
ูุงุนูุฏ ุงูุชุณุฌูู ูู ุงููุตู ุงูุงูู', 'source': 'guide.md', 'rrf_score': 0.1, 'metadata': {}},
|
| 18 |
-
{'text': 'ูููุฉ ุงูุนููู
ุฌุงู
ุนุฉ ุนูู ุดู
ุณ ุชุฃุณุณุช ุนุงู
1950', 'source': 'guide.md', 'rrf_score': 0.05, 'metadata': {}}
|
| 19 |
-
]
|
| 20 |
-
|
| 21 |
-
print("\nTesting reranking...")
|
| 22 |
-
result = await rerank_chunks('ู
ุง ูู ุงูุณุงู
ูููุฉ ุงูุนููู
ุ', chunks, top_k=3)
|
| 23 |
-
print("\nResults:")
|
| 24 |
-
for r in result:
|
| 25 |
-
print(f"{r['rerank_score']:.4f} | {r['text'][:60]}")
|
| 26 |
-
|
| 27 |
-
if __name__ == "__main__":
|
| 28 |
-
asyncio.run(test())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|