Spaces:

hmqid03
/

vietqa-api

Running

App Files Files Community

quanho114 commited on Jan 15

Commit

ebb8326

1 Parent(s): 0d3f194

Deploy VietQA API

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Dockerfile +18 -0
api.py +164 -0
requirements-prod.txt +28 -0
src/__init__.py +2 -0
src/__pycache__/__init__.cpython-312.pyc +0 -0
src/__pycache__/__init__.cpython-314.pyc +0 -0
src/__pycache__/config.cpython-312.pyc +0 -0
src/__pycache__/config.cpython-314.pyc +0 -0
src/__pycache__/graph.cpython-312.pyc +0 -0
src/__pycache__/pipeline.cpython-312.pyc +0 -0
src/__pycache__/state.cpython-312.pyc +0 -0
src/config.py +110 -0
src/data_processing/__init__.py +26 -0
src/data_processing/__pycache__/__init__.cpython-312.pyc +0 -0
src/data_processing/__pycache__/__init__.cpython-314.pyc +0 -0
src/data_processing/__pycache__/answer.cpython-312.pyc +0 -0
src/data_processing/__pycache__/answer.cpython-314.pyc +0 -0
src/data_processing/__pycache__/formatting.cpython-312.pyc +0 -0
src/data_processing/__pycache__/formatting.cpython-314.pyc +0 -0
src/data_processing/__pycache__/loaders.cpython-312.pyc +0 -0
src/data_processing/__pycache__/loaders.cpython-314.pyc +0 -0
src/data_processing/__pycache__/models.cpython-312.pyc +0 -0
src/data_processing/__pycache__/models.cpython-314.pyc +0 -0
src/data_processing/answer.py +151 -0
src/data_processing/formatting.py +37 -0
src/data_processing/loaders.py +151 -0
src/data_processing/models.py +29 -0
src/graph.py +47 -0
src/nodes/__init__.py +15 -0
src/nodes/__pycache__/__init__.cpython-312.pyc +0 -0
src/nodes/__pycache__/direct.cpython-312.pyc +0 -0
src/nodes/__pycache__/logic.cpython-312.pyc +0 -0
src/nodes/__pycache__/rag.cpython-312.pyc +0 -0
src/nodes/__pycache__/router.cpython-312.pyc +0 -0
src/nodes/direct.py +42 -0
src/nodes/logic.py +253 -0
src/nodes/rag.py +141 -0
src/nodes/router.py +112 -0
src/pipeline.py +215 -0
src/state.py +16 -0
src/templates/direct_answer.j2 +19 -0
src/templates/logic_solver.j2 +37 -0
src/templates/rag.j2 +25 -0
src/templates/router.j2 +43 -0
src/utils/__init__.py +47 -0
src/utils/__pycache__/__init__.cpython-312.pyc +0 -0
src/utils/__pycache__/__init__.cpython-314.pyc +0 -0
src/utils/__pycache__/checkpointing.cpython-312.pyc +0 -0
src/utils/__pycache__/checkpointing.cpython-314.pyc +0 -0
src/utils/__pycache__/common.cpython-312.pyc +0 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,18 @@

+# Dockerfile for Hugging Face Spaces
+FROM python:3.11-slim
+WORKDIR /app
+# Install dependencies
+COPY requirements-prod.txt .
+RUN pip install --no-cache-dir -r requirements-prod.txt
+# Copy application code
+COPY api.py .
+COPY src/ ./src/
+# Expose port (HF Spaces uses port 7860)
+EXPOSE 7860
+# Run the API
+CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "7860"]

api.py ADDED Viewed

	@@ -0,0 +1,164 @@

+"""FastAPI Backend for VietQA Multi-Agent System."""
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from src.data_processing.models import QuestionInput
+from src.data_processing.formatting import question_to_state
+from src.data_processing.answer import normalize_answer
+from src.graph import get_graph
+from src.utils.llm import set_large_model_override, get_available_large_models
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Fast startup - lazy load models on first request."""
+    print("[Startup] Server starting (models will load on first request)...")
+    print("[Startup] Server ready!")
+    yield
+app = FastAPI(
+    title="VietQA Multi-Agent API",
+    description="API cho hệ thống trả lời câu hỏi trắc nghiệm tiếng Việt",
+    version="1.0.0",
+    lifespan=lifespan
+)
+# CORS for frontend
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+class SolveRequest(BaseModel):
+    question: str
+    choices: list[str]
+    model: str | None = None
+class SolveResponse(BaseModel):
+    answer: str
+    route: str
+    reasoning: str
+    context: str
+def clean_thinking_tags(text: str) -> str:
+    """Remove <think>...</think> tags from model response."""
+    import re
+    # Remove think tags and their content
+    cleaned = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
+    return cleaned.strip()
+class ChatRequest(BaseModel):
+    message: str
+    model: str | None = None
+class ChatResponse(BaseModel):
+    response: str
+    route: str
+class ModelsResponse(BaseModel):
+    models: list[dict]
+@app.get("/")
+async def root():
+    return {"message": "VietQA Multi-Agent API", "status": "running"}
+@app.get("/health")
+async def health():
+    """Health check endpoint for Render."""
+    return {"status": "ok"}
+@app.get("/api/models", response_model=ModelsResponse)
+async def get_models():
+    """Get available large models."""
+    models = get_available_large_models()
+    return {
+        "models": [
+            {"id": m, "name": m.split("/")[-1]}
+            for m in models
+        ]
+    }
+@app.post("/api/solve", response_model=SolveResponse)
+async def solve_question(req: SolveRequest):
+    """Solve a multiple-choice question."""
+    if not req.question.strip():
+        raise HTTPException(400, "Question is required")
+    if len(req.choices) < 2:
+        raise HTTPException(400, "At least 2 choices required")
+    set_large_model_override(req.model)
+    try:
+        q = QuestionInput(qid="api", question=req.question, choices=req.choices)
+        state = question_to_state(q)
+        graph = get_graph()
+        result = await graph.ainvoke(state)
+        answer = normalize_answer(
+            answer=result.get("answer", "A"),
+            num_choices=len(req.choices),
+            question_id="api",
+            default="A"
+        )
+        return SolveResponse(
+            answer=answer,
+            route=result.get("route", "unknown"),
+            reasoning=clean_thinking_tags(result.get("raw_response", "")),
+            context=result.get("context", "")
+        )
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        raise HTTPException(500, str(e))
+    finally:
+        set_large_model_override(None)
+@app.post("/api/chat", response_model=ChatResponse)
+async def chat(req: ChatRequest):
+    """Free-form chat (routes through pipeline without choices)."""
+    if not req.message.strip():
+        raise HTTPException(400, "Message is required")
+    set_large_model_override(req.model)
+    try:
+        # Use empty choices for chat mode
+        q = QuestionInput(qid="chat", question=req.message, choices=[])
+        state = question_to_state(q)
+        graph = get_graph()
+        result = await graph.ainvoke(state)
+        return ChatResponse(
+            response=clean_thinking_tags(result.get("raw_response", "")),
+            route=result.get("route", "unknown")
+        )
+    except Exception as e:
+        raise HTTPException(500, str(e))
+    finally:
+        set_large_model_override(None)
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

requirements-prod.txt ADDED Viewed

	@@ -0,0 +1,28 @@

+# Production dependencies - minimal for API only
+fastapi==0.115.0
+uvicorn==0.30.6
+pydantic==2.12.5
+pydantic-settings==2.12.0
+python-dotenv==1.2.1
+# LangChain
+langchain==1.1.0
+langchain-core==1.1.0
+langchain-community==0.4.1
+langchain-text-splitters==1.0.0
+langgraph==1.0.4
+# LangChain integrations
+langchain-openai
+# HTTP client
+httpx==0.28.1
+requests==2.32.5
+# Jinja2 for templates
+jinja2==3.1.6
+# Other essentials
+tenacity==9.1.2
+pyyaml==6.0.3
+jsonpatch==1.33

src/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ """VNPT AI RAG Pipeline for Vietnamese multiple-choice questions."""
2	+

src/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (225 Bytes). View file

src/__pycache__/__init__.cpython-314.pyc ADDED Viewed

Binary file (227 Bytes). View file

src/__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (3.91 kB). View file

src/__pycache__/config.cpython-314.pyc ADDED Viewed

Binary file (3.88 kB). View file

src/__pycache__/graph.cpython-312.pyc ADDED Viewed

Binary file (1.82 kB). View file

src/__pycache__/pipeline.cpython-312.pyc ADDED Viewed

Binary file (10.9 kB). View file

src/__pycache__/state.cpython-312.pyc ADDED Viewed

Binary file (726 Bytes). View file

src/config.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import os
+from pathlib import Path
+from dotenv import load_dotenv
+from pydantic import Field
+from pydantic_settings import BaseSettings
+load_dotenv()
+PROJECT_ROOT = Path(__file__).resolve().parent.parent
+DATA_DIR = Path(os.getenv("DATA_DIR", PROJECT_ROOT / "data"))
+DATA_INPUT_DIR = Path(os.getenv("DATA_INPUT_DIR", PROJECT_ROOT / "test_data"))
+DATA_OUTPUT_DIR = Path(os.getenv("DATA_OUTPUT_DIR", PROJECT_ROOT / "output"))
+DATA_CRAWLED_DIR = Path(os.getenv("DATA_CRAWLED_DIR", DATA_DIR / "crawl"))
+BATCH_SIZE = 1
+class Settings(BaseSettings):
+    """Application settings with environment variable support."""
+    # MegaLLM API settings (for small model)
+    megallm_api_key: str = Field(
+        default="",
+        alias="MEGALLM_API_KEY",
+        description="API key for MegaLLM",
+    )
+    megallm_base_url: str = Field(
+        default="https://ai.megallm.io/v1",
+        alias="MEGALLM_BASE_URL",
+    )
+    # Groq API settings (for large model)
+    groq_api_key: str = Field(
+        default="",
+        alias="GROQ_API_KEY",
+        description="API key for Groq",
+    )
+    groq_base_url: str = Field(
+        default="https://api.groq.com/openai/v1",
+        alias="GROQ_BASE_URL",
+    )
+    # OpenRouter API (fallback)
+    openrouter_api_key: str = Field(
+        default="",
+        alias="OPENROUTER_API_KEY",
+        description="API key for OpenRouter (fallback)",
+    )
+    # Model names
+    model_small: str = Field(
+        default="qwen/qwen3-32b",
+        alias="MODEL_SMALL",
+        description="Small model for routing, reranking, and RAG",
+    )
+    model_large: str = Field(
+        default="meta-llama/llama-4-scout-17b-16e-instruct",
+        alias="MODEL_LARGE",
+        description="Large model for logic/direct answering",
+    )
+    # Available large models for testing
+    available_large_models: list[str] = [
+        "llama-3.3-70b-versatile",
+        "meta-llama/llama-4-scout-17b-16e-instruct",
+        "moonshotai/kimi-k2-instruct-0905",
+        "openai/gpt-oss-120b"
+    ]
+    # Local embedding model (Vietnamese)
+    embedding_model: str = Field(
+        default="bkai-foundation-models/vietnamese-bi-encoder",
+        alias="EMBEDDING_MODEL",
+    )
+    # Vector database
+    qdrant_collection: str = Field(
+        default="vnpt_knowledge_base",
+        alias="QDRANT_COLLECTION",
+    )
+    vector_db_path: str = Field(
+        default="",
+        alias="VECTOR_DB_PATH",
+        description="Path to Qdrant storage. Defaults to DATA_DIR/qdrant_storage if empty.",
+    )
+    chunk_size: int = 1000
+    chunk_overlap: int = 200
+    top_k_retrieval: int = 10
+    top_k_rerank: int = 3
+    @property
+    def vector_db_path_resolved(self) -> Path:
+        """Resolve vector database path, defaulting to DATA_DIR/qdrant_storage."""
+        if self.vector_db_path:
+            return Path(self.vector_db_path)
+        return DATA_DIR / "qdrant_storage"
+    class Config:
+        env_file = ".env"
+        extra = "ignore"
+settings = Settings()
+# Validate API key on import
+if not settings.megallm_api_key:
+    import warnings
+    warnings.warn("MEGALLM_API_KEY not set. LLM calls will fail.")

src/data_processing/__init__.py ADDED Viewed

	@@ -0,0 +1,26 @@

+"""Data processing utilities for the RAG pipeline."""
+from src.data_processing.answer import (
+    extract_answer,
+    extract_and_normalize,
+    normalize_answer,
+    validate_answer,
+)
+from src.data_processing.formatting import format_choices, format_choices_display, question_to_state
+from src.data_processing.loaders import load_test_data_from_csv, load_test_data_from_json
+from src.data_processing.models import InferenceLogEntry, PredictionOutput, QuestionInput
+__all__ = [
+    "QuestionInput",
+    "PredictionOutput",
+    "InferenceLogEntry",
+    "load_test_data_from_json",
+    "load_test_data_from_csv",
+    "question_to_state",
+    "format_choices",
+    "format_choices_display",
+    "extract_answer",
+    "validate_answer",
+    "normalize_answer",
+    "extract_and_normalize",
+]

src/data_processing/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (846 Bytes). View file

src/data_processing/__pycache__/__init__.cpython-314.pyc ADDED Viewed

Binary file (843 Bytes). View file

src/data_processing/__pycache__/answer.cpython-312.pyc ADDED Viewed

Binary file (5.15 kB). View file

src/data_processing/__pycache__/answer.cpython-314.pyc ADDED Viewed

Binary file (5.61 kB). View file

src/data_processing/__pycache__/formatting.cpython-312.pyc ADDED Viewed

Binary file (2.2 kB). View file

src/data_processing/__pycache__/formatting.cpython-314.pyc ADDED Viewed

Binary file (2.72 kB). View file

src/data_processing/__pycache__/loaders.cpython-312.pyc ADDED Viewed

Binary file (6.14 kB). View file

src/data_processing/__pycache__/loaders.cpython-314.pyc ADDED Viewed

Binary file (7.11 kB). View file

src/data_processing/__pycache__/models.cpython-312.pyc ADDED Viewed

Binary file (2.11 kB). View file

src/data_processing/__pycache__/models.cpython-314.pyc ADDED Viewed

Binary file (2.94 kB). View file

src/data_processing/answer.py ADDED Viewed

	@@ -0,0 +1,151 @@

+"""Answer extraction and validation utilities.
+Consolidates answer-related logic:
+- Extraction from LLM responses (CoT format)
+- Validation against valid choices
+- Normalization with fallback defaults
+"""
+import re
+import string
+from src.utils.logging import print_log
+def extract_answer(response: str, num_choices: int = 4, require_end: bool = False) -> str | None:
+    """Extract answer letter from LLM response using strict explicit answer lines.
+    Only accepts answers from explicit final-answer lines with colon:
+    - "Đáp án: A", "Answer: B" (preferred)
+    - "Lựa chọn: C" (secondary)
+    Returns the LAST valid explicit answer line found (later lines override earlier).
+    Args:
+        response: Response text from LLM
+        num_choices: Number of valid choices
+        require_end: If True, only extract answer from last 20% of response
+    Returns:
+        Answer letter (A, B, C, D) or None if no explicit answer found
+    """
+    if not response:
+        return None
+    valid_labels = string.ascii_uppercase[:num_choices]
+    # If require_end, only look at last 20% of response
+    search_text = response
+    if require_end and len(response) > 100:
+        cutoff = int(len(response) * 0.8)
+        search_text = response[cutoff:]
+    # Pattern for primary labels: "Đáp án:" or "Answer:" (highest priority)
+    primary_pattern = r"(?:Đáp\s*án|Answer)[ \t]*[:：][ \t]*\**([A-Z])\b"
+    # Pattern for secondary label: "Lựa chọn:" (lower priority)
+    secondary_pattern = r"Lựa\s*chọn[ \t]*[:：][ \t]*\**([A-Z])\b"
+    # Find all matches for both patterns
+    primary_matches = re.findall(primary_pattern, search_text, flags=re.IGNORECASE)
+    secondary_matches = re.findall(secondary_pattern, search_text, flags=re.IGNORECASE)
+    if primary_matches:
+        answer = primary_matches[-1].upper()
+        if answer in valid_labels:
+            return answer
+    if secondary_matches:
+        answer = secondary_matches[-1].upper()
+        if answer in valid_labels:
+            return answer
+    # Single letter response (entire response is just a letter)
+    clean_response = search_text.strip()
+    if len(clean_response) == 1 and clean_response.upper() in valid_labels:
+        return clean_response.upper()
+    return None
+def validate_answer(answer: str, num_choices: int) -> tuple[bool, str]:
+    """Validate if answer is within valid range and normalize it.
+    Args:
+        answer: Raw answer string from model
+        num_choices: Number of choices available (A, B, C, D, ...)
+    Returns:
+        Tuple of (is_valid, normalized_answer)
+    """
+    valid_answers = string.ascii_uppercase[:num_choices]
+    if answer and answer.upper() in valid_answers:
+        return True, answer.upper()
+    return False, answer or ""
+def normalize_answer(
+    answer: str | None,
+    num_choices: int,
+    question_id: str | None = None,
+    default: str = "A",
+) -> str:
+    """Normalize and validate answer with fallback to default.
+    Combines extraction, validation, and normalization:
+    - Validates answer is within valid range (A, B, C, D, ...)
+    - Normalizes refusal responses
+    - Falls back to default for invalid answers
+    Args:
+        answer: Raw answer string from model (can be None)
+        num_choices: Number of choices available
+        question_id: Optional question ID for logging warnings
+        default: Default answer if validation fails
+    Returns:
+        Normalized answer string
+    """
+    if answer is None:
+        if question_id:
+            print_log(
+                f"        [Warning] No answer extracted for {question_id}, "
+                f"defaulting to {default}"
+            )
+        return default
+    is_valid, normalized = validate_answer(answer, num_choices)
+    if not is_valid:
+        if question_id:
+            print_log(
+                f"        [Warning] Invalid answer '{answer}' for {question_id}, "
+                f"defaulting to {default}"
+            )
+        return default
+    return normalized
+def extract_and_normalize(
+    response: str,
+    num_choices: int,
+    question_id: str | None = None,
+    default: str = "A",
+) -> str:
+    """Extract answer from response and normalize it (convenience function).
+    Combines extract_answer() and normalize_answer() into a single call.
+    Args:
+        response: Raw LLM response text
+        num_choices: Number of valid choices
+        question_id: Optional question ID for logging
+        default: Default answer if extraction/validation fails
+    Returns:
+        Normalized answer string
+    """
+    extracted = extract_answer(response, num_choices=num_choices)
+    return normalize_answer(extracted, num_choices, question_id, default)

src/data_processing/formatting.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import string
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from src.data_processing.models import QuestionInput
+    from src.state import GraphState
+def question_to_state(q: "QuestionInput") -> "GraphState":
+    """Convert QuestionInput to GraphState for pipeline processing."""
+    state: "GraphState" = {
+        "question_id": q.qid,
+        "question": q.question,
+        "all_choices": q.choices,
+    }
+    return state
+def format_choices(choices: list[str]) -> str:
+    """Format choices for LLM prompts (A. ..., B. ..., etc.)."""
+    return "\n".join(f"{label}. {text}" for label, text in zip(string.ascii_uppercase, choices))
+def format_choices_display(choices: list[str]) -> str:
+    """Format choices for console display (2 columns)."""
+    labels = string.ascii_uppercase
+    lines = []
+    for i in range(0, len(choices), 2):
+        parts = []
+        for j in range(2):
+            idx = i + j
+            if idx < len(choices):
+                label = labels[idx] if idx < len(labels) else str(idx)
+                parts.append(f"{label}. {choices[idx]:<30}")
+        if parts:
+            lines.append("   " + " ".join(parts))
+    return "\n".join(lines)

src/data_processing/loaders.py ADDED Viewed

	@@ -0,0 +1,151 @@

+"""Data loading utilities for test questions."""
+import csv
+import json
+from pathlib import Path
+from src.data_processing.models import QuestionInput
+# Standard column mappings for choice columns
+_CHOICE_COLUMN_MAPPINGS = {
+    "choice_a": 0, "choice_b": 1, "choice_c": 2, "choice_d": 3,
+    "option_a": 0, "option_b": 1, "option_c": 2, "option_d": 3,
+    "a": 0, "b": 1, "c": 2, "d": 3,
+}
+def load_test_data_from_json(file_path: Path) -> list[QuestionInput]:
+    """Load test questions from JSON file.
+    Expected format: List of dicts with qid, question, choices, answer (optional)
+    Args:
+        file_path: Path to JSON file
+    Returns:
+        List of QuestionInput objects
+    Raises:
+        FileNotFoundError: If file doesn't exist
+        ValueError: If file format is invalid
+    """
+    if not file_path.exists():
+        raise FileNotFoundError(f"Test data file not found: {file_path}")
+    if file_path.suffix.lower() != ".json":
+        raise ValueError(f"Only JSON files are supported: {file_path}")
+    with open(file_path, encoding="utf-8") as f:
+        data = json.load(f)
+    if not isinstance(data, list):
+        raise ValueError(f"JSON file must contain a list of questions: {file_path}")
+    questions = []
+    for item in data:
+        if "choices" not in item or not isinstance(item["choices"], list):
+            raise ValueError(f"Question {item.get('qid', 'unknown')} must have 'choices' as a list")
+        questions.append(QuestionInput(
+            qid=item["qid"],
+            question=item["question"],
+            choices=item["choices"],
+            answer=item.get("answer"),
+        ))
+    return questions
+def _normalize_row_keys(row: dict[str, str]) -> dict[str, str]:
+    """Normalize row keys to lowercase and strip whitespace."""
+    return {k.lower().strip(): v for k, v in row.items()}
+def _extract_choices_from_row(row: dict[str, str]) -> list[str]:
+    """Extract choices from a normalized CSV row.
+    Tries multiple strategies:
+    1. Individual choice columns (choice_a/option_a/a, etc.)
+    2. JSON array in 'choices' column
+    3. Comma/semicolon separated string in 'choices' column
+    Args:
+        row: Normalized row dict with lowercase keys
+    Returns:
+        List of choice strings (may contain empty strings)
+    """
+    # Strategy 1: Individual columns (choice_a, option_a, a, etc.)
+    choices = ["", "", "", ""]
+    found_individual = False
+    for col_name, idx in _CHOICE_COLUMN_MAPPINGS.items():
+        if col_name in row and row[col_name]:
+            choices[idx] = row[col_name].strip()
+            found_individual = True
+    if found_individual:
+        return [c for c in choices if c]
+    # Strategy 2 & 3: Parse 'choices' column
+    choices_raw = row.get("choices", "")
+    if not choices_raw:
+        return []
+    # Try JSON parse first
+    try:
+        parsed = json.loads(choices_raw)
+        if isinstance(parsed, list):
+            return [str(c).strip() for c in parsed if str(c).strip()]
+    except (json.JSONDecodeError, TypeError):
+        pass
+    # Fallback: split by comma or semicolon
+    return [c.strip() for c in choices_raw.replace(";", ",").split(",") if c.strip()]
+def load_test_data_from_csv(file_path: Path) -> list[QuestionInput]:
+    """Load test questions from CSV file.
+    Supports multiple CSV formats:
+    - Columns: qid, question, choice_a, choice_b, choice_c, choice_d
+    - Columns: qid, question, option_a, option_b, option_c, option_d
+    - Columns: qid, question, A, B, C, D
+    - Columns: qid, question, choices (JSON array or comma-separated)
+    Args:
+        file_path: Path to CSV file
+    Returns:
+        List of QuestionInput objects
+    Raises:
+        FileNotFoundError: If file doesn't exist
+    """
+    if not file_path.exists():
+        raise FileNotFoundError(f"Test data file not found: {file_path}")
+    questions = []
+    with open(file_path, encoding="utf-8") as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            norm_row = _normalize_row_keys(row)
+            qid = norm_row.get("qid", "").strip()
+            question = norm_row.get("question", "").strip()
+            if not qid or not question:
+                continue
+            choices = _extract_choices_from_row(norm_row)
+            if not choices:
+                choices = ["", "", "", ""]
+            questions.append(QuestionInput(
+                qid=qid,
+                question=question,
+                choices=choices,
+                answer=norm_row.get("answer", "").strip() or None,
+            ))
+    return questions

src/data_processing/models.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from pydantic import BaseModel, Field
+class QuestionInput(BaseModel):
+    """Input schema for a multiple-choice question."""
+    qid: str = Field(description="Question identifier")
+    question: str = Field(description="Question text in Vietnamese")
+    choices: list[str] = Field(description="List of answer choices")
+    answer: str | None = Field(default=None, description="Correct answer (A, B, C, ...)")
+class PredictionOutput(BaseModel):
+    """Output schema for a prediction."""
+    qid: str = Field(description="Question identifier")
+    answer: str = Field(description="Predicted answer: A, B, C, D, ...")
+class InferenceLogEntry(BaseModel):
+    """Schema for JSONL inference log entry (used for checkpointing)."""
+    qid: str = Field(description="Question identifier")
+    question: str = Field(description="Original question text")
+    choices: list[str] = Field(description="List of answer choices")
+    final_answer: str = Field(description="Final predicted answer")
+    raw_response: str = Field(default="", description="Raw LLM response")
+    route: str = Field(default="unknown", description="Pipeline route taken")
+    retrieved_context: str = Field(default="", description="Retrieved context from RAG")

src/graph.py ADDED Viewed

	@@ -0,0 +1,47 @@

+"""LangGraph definition for the RAG pipeline."""
+from langgraph.graph import END, StateGraph
+from src.state import GraphState
+from src.nodes.logic import logic_solver_node
+from src.nodes.rag import knowledge_rag_node
+from src.nodes.router import route_question, router_node
+from src.nodes.direct import direct_answer_node
+def build_graph() -> StateGraph:
+    """Build and compile the LangGraph pipeline."""
+    workflow = StateGraph(GraphState)
+    workflow.add_node("router", router_node)
+    workflow.add_node("knowledge_rag", knowledge_rag_node)
+    workflow.add_node("logic_solver", logic_solver_node)
+    workflow.add_node("direct_answer", direct_answer_node)
+    workflow.set_entry_point("router")
+    workflow.add_conditional_edges(
+        "router",
+        route_question,
+        {
+            "knowledge_rag": "knowledge_rag",
+            "logic_solver": "logic_solver",
+            "direct_answer": "direct_answer",
+            "__end__": END,
+        },
+    )
+    workflow.add_edge("knowledge_rag", END)
+    workflow.add_edge("logic_solver", END)
+    workflow.add_edge("direct_answer", END)
+    return workflow.compile()
+graph = None
+def get_graph():
+    """Get or create the compiled graph singleton."""
+    global graph
+    if graph is None:
+        graph = build_graph()
+    return graph

src/nodes/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+"""Node implementations for the LangGraph pipeline."""
+from src.nodes.direct import direct_answer_node
+from src.nodes.logic import logic_solver_node
+from src.nodes.rag import knowledge_rag_node
+from src.nodes.router import route_question, router_node
+__all__ = [
+    "direct_answer_node",
+    "knowledge_rag_node",
+    "logic_solver_node",
+    "route_question",
+    "router_node",
+]

src/nodes/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (526 Bytes). View file

src/nodes/__pycache__/direct.cpython-312.pyc ADDED Viewed

Binary file (2.17 kB). View file

src/nodes/__pycache__/logic.cpython-312.pyc ADDED Viewed

Binary file (11.8 kB). View file

src/nodes/__pycache__/rag.cpython-312.pyc ADDED Viewed

Binary file (6.47 kB). View file

src/nodes/__pycache__/router.cpython-312.pyc ADDED Viewed

Binary file (5.9 kB). View file

src/nodes/direct.py ADDED Viewed

	@@ -0,0 +1,42 @@

+"""Direct Answer node for reading comprehension or general questions without RAG."""
+from langchain_core.prompts import ChatPromptTemplate
+from src.data_processing.answer import extract_answer
+from src.data_processing.formatting import format_choices
+from src.state import GraphState
+from src.utils.llm import get_large_model
+from src.utils.logging import print_log
+from src.utils.prompts import load_prompt
+def direct_answer_node(state: GraphState) -> dict:
+    """Answer questions directly using Large Model (Skip Retrieval)."""
+    print_log("        [Direct] Processing Reading Comprehension/General Question...")
+    all_choices = state["all_choices"]
+    choices_text = format_choices(all_choices)
+    llm = get_large_model()
+    system_prompt = load_prompt("direct_answer.j2", "system")
+    user_prompt = load_prompt("direct_answer.j2", "user", question=state["question"], choices=choices_text)
+    # Escape curly braces to prevent LangChain from parsing them as variables
+    system_prompt = system_prompt.replace("{", "{{").replace("}", "}}")
+    user_prompt = user_prompt.replace("{", "{{").replace("}", "}}")
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", system_prompt),
+        ("human", user_prompt),
+    ])
+    chain = prompt | llm
+    response = chain.invoke({})
+    content = response.content.strip()
+    print_log(f"        [Direct] Reasoning: {content}...")
+    answer = extract_answer(content, num_choices=len(all_choices) or 4)
+    print_log(f"        [Direct] Final Answer: {answer}")
+    return {"answer": answer, "raw_response": content}

src/nodes/logic.py ADDED Viewed

	@@ -0,0 +1,253 @@

+"""Logic solver node implementing a Manual Code Execution workflow."""
+import re
+import string
+from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
+from langchain_experimental.utilities import PythonREPL
+from src.data_processing.answer import extract_answer
+from src.data_processing.formatting import format_choices
+from src.state import GraphState
+from src.utils.llm import get_large_model
+from src.utils.logging import print_log
+from src.utils.prompts import load_prompt
+_python_repl = PythonREPL()
+def extract_python_code(text: str) -> str | None:
+    """Find and extract Python code from block ``` python ...   ```"""
+    match = re.search(r"```(?:python)?\s*(.*?)```", text, re.DOTALL | re.IGNORECASE)
+    if match:
+        return match.group(1).strip()
+    return None
+def _validate_code_syntax(code: str) -> tuple[bool, str]:
+    """Check if code has valid Python syntax. Returns (is_valid, error_message)."""
+    try:
+        compile(code, "<string>", "exec")
+        return True, ""
+    except SyntaxError as e:
+        return False, str(e)
+def _is_placeholder_code(code: str) -> bool:
+    """Check if code contains placeholders or is incomplete."""
+    if not code or len(code.strip()) < 10:
+        return True
+    if "..." in code:
+        return True
+    # Check for {key}-style placeholders (but not f-string or dict literals)
+    if re.search(r"\{[a-zA-Z_][a-zA-Z0-9_]*\}", code):
+        # Exclude common dict/set patterns and f-strings
+        if not re.search(r'["\'][^"\']*\{[a-zA-Z_]', code):
+            return True
+    return False
+def _indent_code(code: str) -> str:
+    """Format code to make it easier to read in the terminal."""
+    return "\n".join(f"        {line}" for line in code.splitlines())
+def _fallback_text_reasoning(llm, question: str, choices_text: str) -> dict:
+    """Fallback to CoT reasoning when code execution fails."""
+    print_log("        [Logic] Falling back to CoT reasoning...")
+    fallback_system = (
+        "Nhiệm vụ của bạn là trả lời câu hỏi "
+        "được đưa ra bằng khả năng phân tích và suy luận logic. "
+        "Hãy phân tích vấn đề và suy luận đề từng bước một. "
+        "Cuối cùng, hãy trả lời theo đúng định dạng: 'Đáp án: X' "
+        "trong đó X là ký tự đại diện cho lựa chọn đúng (A, B, C, D, ...)."
+    )
+    fallback_user = (
+        f"Câu hỏi: {question}\n"
+        f"{choices_text}"
+    )
+    fallback_messages: list[BaseMessage] = [
+        SystemMessage(content=fallback_system),
+        HumanMessage(content=fallback_user)
+    ]
+    fallback_response = llm.invoke(fallback_messages)
+    fallback_content = fallback_response.content
+    print_log(f"        [Logic] Fallback response received.")
+    return {"text": fallback_content}
+def _request_final_answer(llm, question: str, choices_text: str, computed_results: str) -> str:
+    """Request a strict final answer from the model."""
+    system_prompt = (
+        "Bạn là trợ lý AI. Dựa vào kết quả tính toán được cung cấp, "
+        "hãy đưa ra đáp án cuối cùng. CHỈ trả lời đúng một dòng: Đáp án: X "
+        "(trong đó X là A, B, C hoặc D)."
+    )
+    user_prompt = (
+        f"Câu hỏi: {question}\n"
+        f"{choices_text}\n"
+        f"Kết quả tính toán: {computed_results}\n\n"
+        "Trả lời đúng một dòng: Đáp án: X"
+    )
+    messages: list[BaseMessage] = [
+        SystemMessage(content=system_prompt),
+        HumanMessage(content=user_prompt)
+    ]
+    response = llm.invoke(messages)
+    return response.content
+def logic_solver_node(state: GraphState) -> dict:
+    """Solve math/logic questions using Python code execution."""
+    llm = get_large_model()
+    all_choices = state["all_choices"]
+    num_choices = len(all_choices)
+    choices_text = format_choices(all_choices)
+    system_prompt = load_prompt("logic_solver.j2", "system")
+    user_prompt = load_prompt("logic_solver.j2", "user", question=state["question"], choices=choices_text)
+    messages: list[BaseMessage] = [
+        SystemMessage(content=system_prompt),
+        HumanMessage(content=user_prompt)
+    ]
+    step_texts: list[str] = []
+    computed_outputs: list[str] = []
+    max_steps = 5
+    for step in range(max_steps):
+        response = llm.invoke(messages)
+        content = response.content
+        step_texts.append(content)
+        messages.append(response)
+        code_block = extract_python_code(content)
+        if code_block:
+            if _is_placeholder_code(code_block):
+                print_log(f"        [Logic] Step {step+1}: Placeholder code detected. Requesting complete code...")
+                regen_msg = (
+                    "Code không hợp lệ (chứa placeholder hoặc không đầy đủ). "
+                    "Hãy cung cấp code Python hoàn chỉnh, có thể chạy được, không chứa '...' hay placeholder. "
+                    "In ra các giá trị tính toán được. "
+                    "Cuối cùng, kết thúc bằng một dòng duy nhất: Đáp án: X (X là A, B, C hoặc D)."
+                )
+                messages.append(HumanMessage(content=regen_msg))
+                continue
+            print_log(f"        [Logic] Step {step+1}: Found Python code. Executing...")
+            # Validate syntax before execution
+            is_valid, syntax_error = _validate_code_syntax(code_block)
+            if not is_valid:
+                print_log(f"        [Error] Syntax error detected: {syntax_error}")
+                error_msg = f"SyntaxError: {syntax_error}. "
+                error_msg += "Lưu ý: KHÔNG sử dụng các từ khóa Python như 'lambda', 'class', 'def' làm tên biến. "
+                error_msg += "Hãy đổi tên biến và thử lại."
+                messages.append(HumanMessage(content=error_msg))
+                continue
+            print_log(f"        [Logic] Code:\n{_indent_code(code_block)}")
+            try:
+                if "print" not in code_block:
+                    lines = code_block.splitlines()
+                    if lines:
+                        last_line = lines[-1]
+                        if "=" in last_line:
+                            var_name = last_line.split("=")[0].strip()
+                        else:
+                            var_name = last_line.strip()
+                        code_block += f"\nprint({var_name})"
+                output = _python_repl.run(code_block)
+                output = output.strip() if output else "No output."
+                print_log(f"        [Logic] Code output: {output}")
+                computed_outputs.append(output)
+                # Do NOT extract answer from code output directly
+                # Instead, feed output back to model and ask for final answer line
+                feedback_msg = (
+                    f"Kết quả thực thi code: {output}\n\n"
+                    "Dựa vào kết quả trên, hãy so sánh với các đáp án và đưa ra câu trả lời cuối cùng. "
+                    "Kết thúc bằng đúng một dòng: Đáp án: X (X là A, B, C hoặc D)."
+                )
+                messages.append(HumanMessage(content=feedback_msg))
+            except Exception as e:
+                error_msg = f"Error running code: {str(e)}"
+                print_log(f"        [Error] {error_msg}")
+                messages.append(HumanMessage(content=f"{error_msg}. Hãy kiểm tra logic và sửa lại code."))
+            continue
+        # Check if current step contains an explicit answer (only at end of response)
+        step_answer = extract_answer(content, num_choices=num_choices, require_end=True)
+        if step_answer:
+            print_log(f"        [Logic] Step {step+1}: Found explicit answer: {step_answer}")
+            combined_raw = "\n---STEP---\n".join(step_texts)
+            return {"answer": step_answer, "raw_response": combined_raw, "route": "math"}
+        # Also check if response contains clear conclusion without "Đáp án:" format
+        if any(phrase in content.lower() for phrase in ["kết luận", "vậy đáp án", "do đó", "vì vậy"]):
+            # Try to extract any single letter at end of response
+            lines = content.strip().split('\n')
+            for line in reversed(lines[-3:]):  # Check last 3 lines
+                line = line.strip()
+                if len(line) == 1 and line.upper() in string.ascii_uppercase[:num_choices]:
+                    print_log(f"        [Logic] Step {step+1}: Found implicit answer: {line.upper()}")
+                    combined_raw = "\n---STEP---\n".join(step_texts)
+                    return {"answer": line.upper(), "raw_response": combined_raw, "route": "math"}
+        if step < max_steps - 1:
+            print_log("        [Warning] No code or answer found. Reminding model...")
+            messages.append(HumanMessage(content="Lưu ý: Bạn vẫn chưa đưa ra đáp án cuối cùng. Hãy kết thúc bằng: Đáp án: X"))
+    # Max steps reached - build combined_raw and try to extract answer
+    print_log("        [Warning] Max steps reached. Attempting answer extraction from combined text...")
+    # Build combined_raw from all steps
+    combined_raw = "\n---STEP---\n".join(step_texts) if step_texts else ""
+    # Try fallback text reasoning with error handling
+    try:
+        fallback_result = _fallback_text_reasoning(llm, state["question"], choices_text)
+        fallback_text = fallback_result["text"]
+        if fallback_text:
+            combined_raw += "\n---FALLBACK---\n" + fallback_text
+    except Exception as e:
+        print_log(f"        [Error] Fallback reasoning failed: {e}")
+        fallback_text = ""
+    # Extract answer from the entire combined text (takes LAST explicit answer)
+    final_answer = extract_answer(combined_raw, num_choices=num_choices)
+    if final_answer:
+        print_log(f"        [Logic] Extracted final answer from combined text: {final_answer}")
+        return {"answer": final_answer, "raw_response": combined_raw, "route": "math"}
+    # Still no answer - do one final strict LLM call with error handling
+    print_log("        [Logic] No explicit answer found. Requesting strict final answer...")
+    computed_str = "; ".join(computed_outputs) if computed_outputs else "Không có kết quả tính toán"
+    try:
+        strict_response = _request_final_answer(llm, state["question"], choices_text, computed_str)
+        combined_raw += "\n---FINAL---\n" + strict_response
+        final_answer = extract_answer(strict_response, num_choices=num_choices)
+        if final_answer:
+            print_log(f"        [Logic] Final strict answer: {final_answer}")
+            return {"answer": final_answer, "raw_response": combined_raw, "route": "math"}
+    except Exception as e:
+        print_log(f"        [Error] Final answer request failed: {e}")
+    # Absolute fallback - default to A
+    print_log("        [Warning] All extraction attempts failed. Defaulting to A.")
+    return {"answer": "A", "raw_response": combined_raw, "route": "math"}

src/nodes/rag.py ADDED Viewed

	@@ -0,0 +1,141 @@

+"""RAG node for knowledge-based question answering with Retrieve & Rerank."""
+import re
+from langchain_core.prompts import ChatPromptTemplate
+from src.config import settings
+from src.data_processing.answer import extract_answer
+from src.data_processing.formatting import format_choices
+from src.state import GraphState
+from src.utils.ingestion import get_vector_store
+from src.utils.llm import get_small_model
+from src.utils.logging import print_log
+from src.utils.prompts import load_prompt
+from src.nodes.direct import direct_answer_node
+def _rerank_documents(query: str, docs: list, top_k: int = 3) -> list:
+    """Rerank retrieved documents using the small LLM.
+    Args:
+        query: The user question
+        docs: List of retrieved documents
+        top_k: Number of top documents to return after reranking
+    Returns:
+        List of reranked documents (top_k most relevant)
+    """
+    if len(docs) <= top_k:
+        return docs
+    llm = get_small_model()
+    # Build document list for reranking prompt
+    doc_list = ""
+    for i, doc in enumerate(docs):
+        content_preview = doc.page_content[:350].replace("\n", " ")
+        doc_list += f"[{i}] {content_preview}...\n\n"
+    rerank_system = (
+        "/no_think\n"
+        "Bạn là chuyên gia đánh giá độ liên quan của văn bản. "
+        "Nhiệm vụ: Chọn ra các đoạn văn bản LIÊN QUAN NHẤT với câu hỏi.\n"
+        "Chỉ trả về danh sách các số ID (ví dụ: 0, 3, 5), không giải thích."
+    )
+    rerank_user = (
+        f"Câu hỏi: {query}\n\n"
+        f"Các đoạn văn bản:\n{doc_list}\n"
+        f"Hãy chọn {top_k} đoạn văn bản LIÊN QUAN NHẤT với câu hỏi. "
+        f"Trả về danh sách ID (số từ 0 đến {len(docs)-1}), cách nhau bởi dấu phẩy."
+    )
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", rerank_system),
+        ("human", rerank_user),
+    ])
+    try:
+        chain = prompt | llm
+        response = chain.invoke({})
+        content = response.content.strip()
+        print_log(f"        [RAG] Reranker response: {content}")
+        # Parse selected IDs from response
+        selected_ids = []
+        numbers = re.findall(r'\d+', content)
+        for num_str in numbers:
+            idx = int(num_str)
+            if 0 <= idx < len(docs) and idx not in selected_ids:
+                selected_ids.append(idx)
+                if len(selected_ids) >= top_k:
+                    break
+        if selected_ids:
+            reranked = [docs[i] for i in selected_ids]
+            print_log(f"        [RAG] Reranked: selected {len(reranked)} docs from {len(docs)}")
+            return reranked
+        print_log("        [RAG] Rerank parsing failed, using first top_k docs")
+        return docs[:top_k]
+    except Exception as e:
+        print_log(f"        [RAG] Reranking failed: {e}. Using keyword boosting fallback.")
+        return docs[:top_k]
+def knowledge_rag_node(state: GraphState) -> dict:
+    """Retrieve relevant context, rerank, and answer knowledge-based questions."""
+    vector_store = get_vector_store()
+    query = state["question"]
+    print_log(f"        [RAG] Retrieving context for: '{query}'")
+    docs = vector_store.similarity_search(query, k=settings.top_k_retrieval)
+    print_log(f"        [RAG] Retrieved {len(docs)} documents")
+    if not docs:
+        print_log("        [Warning] No relevant documents found in Knowledge Base.")
+        context = ""
+    else:
+        reranked_docs = _rerank_documents(query, docs, top_k=settings.top_k_rerank)
+        context = "\n\n---\n\n".join([doc.page_content for doc in reranked_docs])
+        if reranked_docs:
+            print_log(f"        [RAG] Using {len(reranked_docs)} reranked docs. Top: \"{reranked_docs[0].page_content[:80]}...\"")
+    all_choices = state["all_choices"]
+    choices_text = format_choices(all_choices)
+    llm = get_small_model()
+    system_prompt = load_prompt("rag.j2", "system", context=context)
+    user_prompt = load_prompt("rag.j2", "user", question=state["question"], choices=choices_text)
+    # Escape curly braces to prevent LangChain from parsing them as variables
+    system_prompt = system_prompt.replace("{", "{{").replace("}", "}}")
+    user_prompt = user_prompt.replace("{", "{{").replace("}", "}}")
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", system_prompt),
+        ("human", user_prompt),
+    ])
+    chain = prompt | llm
+    response = chain.invoke({})
+    content = response.content.strip()
+    print_log(f"        [RAG] Reasoning: {content}")
+    answer = extract_answer(content, num_choices=len(all_choices) or 4)
+    print_log(f"        [RAG] Final Answer: {answer}")
+    # Fallback to direct mode if RAG context was not helpful
+    if answer is None:
+        print_log("        [RAG] Context not relevant, falling back to direct mode...")
+        direct_result = direct_answer_node(state)
+        direct_result["route"] = "rag->direct"  # Track the fallback
+        return direct_result
+    return {"answer": answer, "context": context, "raw_response": content}

src/nodes/router.py ADDED Viewed

	@@ -0,0 +1,112 @@

+"""Router node for classifying questions and directing to appropriate handlers."""
+import string
+from typing import Literal
+from langchain_core.prompts import ChatPromptTemplate
+from src.data_processing.formatting import format_choices
+from src.state import GraphState
+from src.utils.llm import get_small_model
+from src.utils.logging import print_log
+from src.utils.prompts import load_prompt
+def _find_refusal_option(state: GraphState) -> str | None:
+    """Find refusal option in choices and return corresponding letter."""
+    all_choices = state["all_choices"]
+    option_labels = list(string.ascii_uppercase[:len(all_choices)])
+    refusal_patterns = [
+        "tôi không thể", "không thể trả lời", "không thể cung cấp", "không thể chia sẻ",
+        "từ chối trả lời", "từ chối cung cấp",
+        "nằm ngoài phạm vi", "không thuộc phạm vi", "tôi là mô hình ngôn ngữ",
+        "hành vi vi phạm", "trái pháp luật", "không hỗ trợ",
+    ]
+    for i, choice in enumerate(all_choices):
+        txt = choice.lower().strip()
+        if any(p in txt for p in refusal_patterns):
+            return option_labels[i]
+    return None
+def _classify_with_llm(state: GraphState) -> str:
+    """Classify question using LLM."""
+    choices_text = format_choices(state["all_choices"])
+    llm = get_small_model()
+    system_prompt = load_prompt("router.j2", "system")
+    user_prompt = load_prompt("router.j2", "user", question=state["question"], choices=choices_text)
+    # Escape curly braces to prevent LangChain from parsing them as variables
+    system_prompt = system_prompt.replace("{", "{{").replace("}", "}}")
+    user_prompt = user_prompt.replace("{", "{{").replace("}", "}}")
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", system_prompt),
+        ("human", user_prompt),
+    ])
+    chain = prompt | llm
+    response = chain.invoke({})
+    return response.content.strip().lower()
+def router_node(state: GraphState) -> dict:
+    """Analyze question and determine routing path. Returns answer immediately for toxic content."""
+    question = state["question"].lower()
+    # Fast-track: Direct answer for reading comprehension
+    direct_keywords = ["đoạn thông tin", "đoạn văn", "bài đọc", "căn cứ vào đoạn", "theo đoạn"]
+    if any(k in question for k in direct_keywords) and len(question.split()) > 50:
+        print_log("        [Router] Fast-track: Direct Answer (Found Context block)")
+        return {"route": "direct"}
+    # Fast-track: Math/Logic for LaTeX or math keywords
+    math_signals = [
+        "$", "\\frac", "^", "=", "tính giá trị", "biểu thức", "phương trình",
+        "hàm số", "đạo hàm", "xác suất", "lãi suất", "vận tốc", "gia tốc",
+        "điện trở", "gam", "mol", "nguyên tử khối", "gdp", "lạm phát", "công suất"
+    ]
+    if any(s in question for s in math_signals):
+        print_log("        [Router] Fast-track: Math (Keywords/LaTeX detected)")
+        return {"route": "math"}
+    print_log("        [Router] Slow-track: Using LLM to classify...")
+    try:
+        route = _classify_with_llm(state)
+        print_log(f"        [Router] LLM Decision: {route}")
+        if "direct" in route:
+            route_type = "direct"
+        elif "math" in route or "logic" in route:
+            route_type = "math"
+        elif "toxic" in route:
+            refusal_answer = _find_refusal_option(state)
+            if refusal_answer:
+                print_log(f"        [Router] Toxic detected, found refusal option: {refusal_answer}")
+                return {"route": "toxic", "answer": refusal_answer}
+            print_log("        [Router] Toxic detected, no refusal option found, defaulting to A")
+            return {"route": "toxic", "answer": "A"}
+        else:
+            route_type = "rag"
+        return {"route": route_type}
+    except Exception as e:
+        print_log(f"        [Router] Error: {e}. Fallback to RAG.")
+        return {"route": "rag"}
+def route_question(state: GraphState) -> Literal["knowledge_rag", "logic_solver", "direct_answer", "__end__"]:
+    """Conditional edge function to route to appropriate node based on state route."""
+    route = state.get("route", "rag")
+    answer = state.get("answer")
+    if route == "toxic":
+        return "__end__"
+    if route == "direct":
+        return "direct_answer"
+    if route == "math":
+        return "logic_solver"
+    return "knowledge_rag"

src/pipeline.py ADDED Viewed

	@@ -0,0 +1,215 @@

+"""Core pipeline execution logic for the RAG system."""
+import asyncio
+import csv
+import sys
+import time
+from pathlib import Path
+from src.config import BATCH_SIZE, DATA_OUTPUT_DIR
+from src.data_processing.answer import normalize_answer
+from src.data_processing.formatting import format_choices_display, question_to_state
+from src.data_processing.models import InferenceLogEntry, PredictionOutput, QuestionInput
+from src.graph import get_graph
+from src.utils.checkpointing import (
+    append_log_entry,
+    consolidate_log_file,
+    generate_csv_from_log,
+    is_rate_limit_error,
+)
+from src.utils.common import sort_qids
+from src.utils.ingestion import get_vector_store
+from src.utils.logging import log_done, log_pipeline, log_stats, print_log
+def sort_questions_by_qid(questions: list[QuestionInput]) -> list[QuestionInput]:
+    """Sort questions by qid using natural sorting."""
+    qid_to_question = {q.qid: q for q in questions}
+    sorted_qids = sort_qids(list(qid_to_question.keys()))
+    return [qid_to_question[qid] for qid in sorted_qids]
+async def run_pipeline_async(
+    questions: list[QuestionInput],
+    batch_size: int = BATCH_SIZE,
+) -> list[PredictionOutput]:
+    """Run pipeline for inference (assumes pre-built Vector DB).
+    Args:
+        questions: List of questions to process
+        batch_size: Number of concurrent questions to process
+    Returns:
+        List of PredictionOutput objects sorted by qid
+    """
+    log_pipeline("Loading pre-built vector store...")
+    get_vector_store()
+    questions = sort_questions_by_qid(questions)
+    graph = get_graph()
+    total = len(questions)
+    start_time = time.perf_counter()
+    sem = asyncio.Semaphore(batch_size)
+    results: dict[str, PredictionOutput] = {}
+    async def process_single_question(q: QuestionInput) -> None:
+        async with sem:
+            print_log(f"\n[{q.qid}] {q.question}")
+            print_log(format_choices_display(q.choices))
+            state = question_to_state(q)
+            result = await graph.ainvoke(state)
+            answer = result.get("answer", "A")
+            route = result.get("route", "unknown")
+            num_choices = len(q.choices)
+            normalized_answer = normalize_answer(
+                answer=answer,
+                num_choices=num_choices,
+                question_id=q.qid,
+                default="A",
+            )
+            log_done(f"{q.qid}: {normalized_answer} (Route: {route})")
+            results[q.qid] = PredictionOutput(qid=q.qid, answer=normalized_answer)
+    tasks = [process_single_question(q) for q in questions]
+    await asyncio.gather(*tasks)
+    elapsed = time.perf_counter() - start_time
+    throughput = total / elapsed if elapsed > 0 else 0
+    log_stats(f"Completed {total} questions in {elapsed:.2f}s ({throughput:.2f} req/s)")
+    sorted_qids = sort_qids(list(results.keys()))
+    return [results[qid] for qid in sorted_qids]
+async def run_pipeline_with_checkpointing(
+    questions: list[QuestionInput],
+    log_path: Path,
+    batch_size: int = BATCH_SIZE,
+) -> int:
+    """Run pipeline with JSONL checkpointing for resume capability.
+    Questions are processed in qid order. Results are appended to log file
+    immediately for fault tolerance, then consolidated at the end.
+    Args:
+        questions: List of questions to process (already filtered for unprocessed)
+        log_path: Path to JSONL log file for checkpointing
+        batch_size: Number of concurrent questions to process
+    Returns:
+        Count of newly processed questions
+    """
+    log_pipeline("Loading pre-built vector store...")
+    get_vector_store()
+    questions = sort_questions_by_qid(questions)
+    log_pipeline(f"Processing {len(questions)} questions in qid order...")
+    graph = get_graph()
+    total = len(questions)
+    start_time = time.perf_counter()
+    processed_count = 0
+    sem = asyncio.Semaphore(batch_size)
+    stop_event = asyncio.Event()
+    async def process_single_question(q: QuestionInput) -> None:
+        nonlocal processed_count
+        if stop_event.is_set():
+            return
+        async with sem:
+            if stop_event.is_set():
+                return
+            print_log(f"\n[{q.qid}] {q.question}")
+            print_log(format_choices_display(q.choices))
+            state = question_to_state(q)
+            try:
+                result = await graph.ainvoke(state)
+                route = result.get("route", "unknown")
+                raw_response = result.get("raw_response", "")
+                context = result.get("context", "")
+                answer = normalize_answer(
+                    answer=result.get("answer"),
+                    num_choices=len(q.choices),
+                    question_id=q.qid,
+                    default="A",
+                )
+                log_entry = InferenceLogEntry(
+                    qid=q.qid,
+                    question=q.question,
+                    choices=q.choices,
+                    final_answer=answer,
+                    raw_response=raw_response,
+                    route=route,
+                    retrieved_context=context,
+                )
+                await append_log_entry(log_path, log_entry)
+                log_done(f"{q.qid}: {answer} (Route: {route})")
+                processed_count += 1
+                # await asyncio.sleep(150)
+            except Exception as e:
+                if is_rate_limit_error(e):
+                    print_log(f"        [CRITICAL] Rate Limit Detected on {q.qid}: {e}")
+                    stop_event.set()
+                else:
+                    print_log(f"        [Error] Failed to process {q.qid}: {e}")
+    tasks = [asyncio.create_task(process_single_question(q)) for q in questions]
+    await asyncio.gather(*tasks)
+    if stop_event.is_set():
+        log_pipeline("!!! PIPELINE STOPPED DUE TO RATE LIMIT !!!")
+        log_pipeline("Consolidating logs and generating emergency submission...")
+        consolidate_log_file(log_path)
+        output_file = DATA_OUTPUT_DIR / "submission_emergency.csv"
+        total_entries = generate_csv_from_log(log_path, output_file)
+        log_pipeline(f"Saved emergency submission with {total_entries} entries to: {output_file}")
+        sys.exit(0)
+    log_pipeline("Consolidating log file...")
+    consolidate_log_file(log_path)
+    elapsed = time.perf_counter() - start_time
+    throughput = total / elapsed if elapsed > 0 else 0
+    log_stats(f"Processed {processed_count}/{total} questions in {elapsed:.2f}s ({throughput:.2f} req/s)")
+    return processed_count
+def save_predictions(
+    predictions: list[PredictionOutput],
+    output_path: Path,
+    ensure_dir: bool = True,
+) -> None:
+    """Save predictions to CSV file, sorted by qid.
+    Args:
+        predictions: List of prediction outputs
+        output_path: Path to output CSV file
+        ensure_dir: If True, create parent directory if it doesn't exist
+    """
+    if ensure_dir:
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+    sorted_qids = sort_qids([p.qid for p in predictions])
+    pred_dict = {p.qid: p for p in predictions}
+    with open(output_path, "w", newline="", encoding="utf-8") as f:
+        writer = csv.DictWriter(f, fieldnames=["qid", "answer"])
+        writer.writeheader()
+        for qid in sorted_qids:
+            writer.writerow({"qid": qid, "answer": pred_dict[qid].answer})
+    log_pipeline(f"Predictions saved to: {output_path}")

src/state.py ADDED Viewed

	@@ -0,0 +1,16 @@

+"""State schema definitions for the RAG pipeline graph."""
+from typing import TypedDict
+class GraphState(TypedDict, total=False):
+    """State schema for the RAG pipeline graph."""
+    question_id: str
+    question: str
+    all_choices: list[str]
+    route: str
+    context: str
+    answer: str
+    raw_response: str

src/templates/direct_answer.j2 ADDED Viewed

	@@ -0,0 +1,19 @@

+{# Direct Answer Node Prompt Templates #}
+{% block system %}
+/no_think
+Bạn là một chuyên gia trả lời câu hỏi trắc nghiệm. Nhiệm vụ của bạn là phân tích và chọn đáp án đúng nhất cho câu hỏi.
+NGÔN NGỮ: Toàn bộ suy luận, giải thích PHẢI bằng TIẾNG VIỆT 100%. KHÔNG dùng tiếng Anh.
+Lưu ý:
+1. Nếu đề bài có đoạn văn, CHỈ dựa vào đoạn văn đó để suy luận.
+2. Suy luận từng bước logic.
+- Với câu hỏi về ngày tháng, con số: So sánh chính xác từng ký tự.
+- Nếu câu hỏi yêu cầu tìm từ sai/đúng: Đối chiếu từng phương án với văn bản.
+3. Trả lời bằng: "Đáp án: X" (X là một trong các lựa chọn A, B, C, D, ...).
+{% endblock %}
+{% block user %}
+Câu hỏi: {{ question }}
+{{ choices }}
+{% endblock %}

src/templates/logic_solver.j2 ADDED Viewed

	@@ -0,0 +1,37 @@

+{# Logic Solver (Code Agent) Prompt Templates #}
+{% block system %}
+/no_think
+Bạn là chuyên gia giải toán và logic. Trả lời NGẮN GỌN, SÚNG TÍCH.
+NGÔN NGỮ: Toàn bộ suy luận, giải thích PHẢI bằng TIẾNG VIỆT 100%. KHÔNG dùng tiếng Anh.
+QUY TẮC:
+1. Suy luận ngắn gọn, đi thẳng vào vấn đề
+2. Chỉ nêu các bước quan trọng, bỏ qua chi tiết thừa
+3. Dòng "Đáp án: X" PHẢI là dòng CUỐI CÙNG
+4. Tối đa 5-7 dòng suy luận
+CẤU TRÚC:
+1. Phân tích ngắn gọn
+2. Suy luận chính
+3. Kết luận
+4. Đáp án: X
+VÍ DỤ TỐT (NGẮN GỌN):
+```
+Phân tích: Tính 2 + 3 * 4
+Thứ tự: 3 * 4 = 12, sau đó 2 + 12 = 14
+Kết luận: 14 tương ứng đáp án B
+Đáp án: B
+```
+NHẮC LẠI: NGẮN GỌN, SÚNG TÍCH! Chỉ 5-7 dòng! TIẾNG VIỆT 100%!
+{% endblock %}
+{% block user %}
+{{ question }}
+{{ choices }}
+Suy luận ngắn gọn:
+{% endblock %}

src/templates/rag.j2 ADDED Viewed

	@@ -0,0 +1,25 @@

+{# RAG Node Prompt Templates #}
+{% block system %}
+/no_think
+Bạn là một chuyên gia phân tích thông tin và đọc hiểu văn bản chính xác tuyệt đối.
+Nhiệm vụ: Trả lời câu hỏi trắc nghiệm CHỈ dựa trên thông tin trong phần Văn bản được cung cấp bên dưới.
+NGÔN NGỮ: Toàn bộ suy luận, giải thích PHẢI bằng TIẾNG VIỆT 100%. KHÔNG dùng tiếng Anh.
+Văn bản:
+{{ context }}
+Quy tắc bắt buộc:
+1. Đọc kỹ văn bản để tìm các từ khóa liên quan đến câu hỏi.
+2. So sánh từng lựa chọn với thông tin tìm được trong văn bản.
+3. Suy luận từng bước:
+- Nếu văn bản chứa câu trả lời trực tiếp: Trích dẫn ý đó để xác nhận.
+- Nếu văn bản KHÔNG chứa câu trả lời trực tiếp: Sử dụng phương pháp loại trừ các đáp án sai để chọn đáp án phù hợp và
+đúng nhất.
+4. Trả lời cuối cùng theo định dạng: "Đáp án: X" (trong đó X là ký tự lựa chọn). Ví dụ: "Đáp án: A"
+{% endblock %}
+{% block user %}
+Câu hỏi: {{ question }}
+{{ choices }}
+{% endblock %}

src/templates/router.j2 ADDED Viewed

	@@ -0,0 +1,43 @@

+{# Router Node Prompt Templates #}
+{% block system %}
+/no_think
+Bạn là hệ thống phân loại câu hỏi tiếng Việt. LUÔN trả lời bằng TIẾNG VIỆT.
+Nhiệm vụ: Phân loại câu hỏi vào duy nhất 1 trong 4 nhóm: "toxic", "direct", "math", hoặc "rag".
+Hãy thực hiện theo quy trình kiểm tra thứ tự ưu tiên sau đây (QUAN TRỌNG):
+Ưu tiên 1: Kiểm tra "toxic" (An toàn là trên hết)
+- Nếu câu hỏi yêu cầu hướng dẫn thực hiện hành vi vi phạm pháp luật (trốn thuế, làm giả giấy tờ, tham nhũng, buôn lậu,
+chế tạo vũ khí...).
+- Nếu câu hỏi mang tính chất phản động, chống phá nhà nước, bôi nhọ lãnh tụ, hoặc vi phạm thuần phong mỹ tục.
+-> Trả về: toxic
+Ưu tiên 2: Kiểm tra "direct" (Đọc hiểu văn bản có sẵn)
+- Hãy nhìn vào dữ liệu đầu vào. Nếu nó chứa các từ khóa đánh dấu văn bản như: "Đoạn thông tin:", "Văn bản:", "Document",
+"Title:", "Nội dung:", hoặc một đoạn văn dài đi kèm trước câu hỏi.
+- Bất kể nội dung là Lịch sử hay Khoa học, nếu ĐÃ CÓ đoạn văn bản đi kèm để trả lời -> Phải chọn nhóm này.
+-> Trả về: direct
+Ưu tiên 3: Kiểm tra "math" (Tư duy logic & Tính toán & Lập trình)
+- Các bài tập Toán, Lý, Hóa, Sinh yêu cầu tính toán ra số liệu cụ thể (không phải lý thuyết suông).
+- Các câu hỏi chứa công thức toán học (LaTeX, dấu $, phương trình).
+- Các câu hỏi về Lập trình.
+- Các câu hỏi tư duy logic, chuỗi số, xác suất thống kê.
+-> Trả về: math
+Ưu tiên 4: Kiểm tra "rag" (Tra cứu kiến thức)
+- Các câu hỏi kiến thức về Lịch sử, Địa lý, Luật pháp, Văn hóa, Xã hội.
+- Các câu hỏi lý thuyết khoa học (không cần tính toán).
+- Câu hỏi mà KHÔNG CÓ đoạn văn bản đi kèm.
+-> Trả về: rag
+QUAN TRỌNG: Chỉ trả về đúng 1 từ kết quả (toxic/direct/math/rag). Không giải thích thêm.
+{% endblock %}
+{% block user %}
+Câu hỏi: {{ question }}
+{{ choices }}
+Kết quả phân loại (chỉ 1 từ):
+{% endblock %}

src/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,47 @@

+"""Utility functions for the RAG pipeline."""
+from src.utils.checkpointing import (
+    append_log_entry,
+    consolidate_log_file,
+    generate_csv_from_log,
+    is_rate_limit_error,
+    load_log_entries,
+    load_processed_qids,
+)
+from src.utils.common import normalize_text, remove_diacritics, sort_qids
+from src.utils.ingestion import (
+    get_embeddings,
+    get_qdrant_client,
+    get_vector_store,
+    ingest_all_data,
+    ingest_files,
+)
+from src.utils.llm import get_large_model, get_small_model
+from src.utils.web_crawler import WebCrawler, crawl_website, save_crawled_data
+__all__ = [
+    # Checkpointing
+    "load_processed_qids",
+    "load_log_entries",
+    "append_log_entry",
+    "consolidate_log_file",
+    "generate_csv_from_log",
+    "is_rate_limit_error",
+    "sort_qids",
+    # Ingestion
+    "get_embeddings",
+    "get_qdrant_client",
+    "get_vector_store",
+    "ingest_all_data",
+    "ingest_files",
+    # LLM
+    "get_small_model",
+    "get_large_model",
+    # Text utilities
+    "normalize_text",
+    "remove_diacritics",
+    # Web crawler
+    "WebCrawler",
+    "crawl_website",
+    "save_crawled_data",
+]

src/utils/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (1.03 kB). View file

src/utils/__pycache__/__init__.cpython-314.pyc ADDED Viewed

Binary file (1.03 kB). View file

src/utils/__pycache__/checkpointing.cpython-312.pyc ADDED Viewed

Binary file (5.38 kB). View file

src/utils/__pycache__/checkpointing.cpython-314.pyc ADDED Viewed

Binary file (6.5 kB). View file

src/utils/__pycache__/common.cpython-312.pyc ADDED Viewed

Binary file (3.31 kB). View file