import asyncio
from concurrent.futures import ThreadPoolExecutor
from fastapi import FastAPI, status
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field

# ===========================================================
# 1. IMPORT LÕI AI & RAG TỪ MODEL_MAIN
# ===========================================================
import traceback
try:
    # Ở đây tôi import các Class/Hàm từ file model_main.py của ông
    from model_main import ContextRetriever, get_ai_grade, grade_batch
    # Giả sử trong model_main ông đã khởi tạo sẵn qdrant_manager và embedding_model toàn cục:
    from model_main import qdrant_manager, embedding_model 
    
    # Khởi tạo bộ trích xuất ngữ cảnh RAG
    retriever = ContextRetriever(qdrant=qdrant_manager, embedding=embedding_model)
    SYSTEM_READY = True
    print("🏆 [RAG-SERVER] Khởi tạo lõi RAG và LLM Judge thành công!")
except Exception as e:
    SYSTEM_READY = False
    print("⚠️ [CẢNH BÁO] HỆ THỐNG LÕI ĐÃ SẬP! NGUYÊN NHÂN CHÍNH XÁC LÀ:")
    traceback.print_exc()

# ===========================================================
# 2. CẤU HÌNH FASTAPI SERVER
# ===========================================================
app = FastAPI(
    title="AI Essay Judge Server (RAG + Qwen-72B)",
    description="Server nhận bài văn, tự động tìm tài liệu uy tín trên GDrive/Qdrant và chấm điểm.",
    version="2.0.0"
)

# Bật CORS để Front-end hoặc App khác gọi vào thoải mái
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# ThreadPoolExecutor để chạy các hàm AI đồng bộ (Sync) mà không làm nghẽn Server Async
executor = ThreadPoolExecutor(max_workers=10)

# ===========================================================
# 3. ĐỊNH NGHĨA PYDANTIC SCHEMA (INPUT VÀO)
# ===========================================================
class EssayInput(BaseModel):
    title: str = Field(default="Chưa có tiêu đề", description="Tiêu đề bài văn hoặc đề bài")
    content: str = Field(..., min_length=20, description="Nội dung bài văn của học sinh cần chấm")

class BatchEssayInput(BaseModel):
    essays: list[EssayInput] = Field(..., description="Danh sách nhiều bài văn cần chấm cùng lúc")


# ===========================================================
# 4. API ENDPOINTS
# ===========================================================

@app.get("/health")
def health_check():
    """Kiểm tra trạng thái hoạt động của Server"""
    return {
        "status": "healthy" if SYSTEM_READY else "unhealthy",
        "mode": "RAG_LLM_JUDGE_INTEGRATED",
        "ready": SYSTEM_READY
    }

@app.post("/api/v1/judge")
async def judge_single_essay(payload: EssayInput):
    """
    ENDPOINT CHÍNH: Nhận 1 bài văn -> Tìm đáp án (RAG) -> Chấm điểm (LLM Judge)
    """
    if not SYSTEM_READY:
        return JSONResponse(
            status_code=503, 
            content={"error": "Hệ thống chưa sẵn sàng do lỗi cấu hình lõi AI."}
        )
        
    try:
        loop = asyncio.get_running_loop()
        bai_van = payload.content
        
        # BƯỚC 1: Dùng RAG để tìm file tài liệu đáng tin cậy (Reliable Context) dựa trên bài văn
        # Chạy trong executor để tránh block API
        print(f"🔍 [RAG] Đang tìm kiếm tài liệu chuẩn cho bài viết: '{payload.title}'...")
        tai_lieu_chuan = await loop.run_in_executor(
            executor, retriever.search_context, bai_van, 3 # Lấy top 3 chunks liên quan nhất
        )
        
        if not tai_lieu_chuan or tai_lieu_chuan.strip() == "":
            print("⚠️ [RAG] Không tìm thấy tài liệu phù hợp! Hệ thống sẽ chuyển sang chấm tự do.")
            tai_lieu_chuan = "Không có tài liệu đáp án cụ thể. Hãy chấm điểm dựa trên kiến thức phổ thông chuẩn."

        # BƯỚC 2: Đút Bài văn + Tài liệu chuẩn tìm được vào cho Model để Judge
        print("🤖 [JUDGE] Đang gửi dữ liệu sang Qwen-72B để chấm điểm và export JSON...")
        ket_qua_json = await loop.run_in_executor(
            executor, get_ai_grade, bai_van, tai_lieu_chuan
        )
        
        # Trả thẳng kết quả JSON chuẩn chỉnh về cho Client
        return JSONResponse(status_code=200, content={"status": "success", "data": ket_qua_json})
        
    except Exception as e:
        err_str = str(e)
        # Nếu lỗi liên quan tới LLM/provider, trả về kết quả tạm thời thay vì 500
        if 'response_format' in err_str or 'BadRequestError' in err_str or 'Chưa cấu hình API key' in err_str:
            fallback = {
                "diem": 0.0,
                "xep_loai": "Không chấm được",
                "nhan_xet_chung": f"Lỗi LLM: {err_str}",
                "uu_diem": [],
                "nhuoc_diem": [],
                "chi_tiet_diem": {"noi_dung": 0.0, "hinh_thuc": 0.0, "sang_tao": 0.0},
                "ket_luan": "Kết quả tạm thời do lỗi hệ thống LLM."
            }
            return JSONResponse(status_code=200, content={"status": "fallback", "data": fallback})

        return JSONResponse(
            status_code=500,
            content={"error": "Lỗi xử lý chấm điểm nội bộ", "detail": err_str}
        )

@app.post("/api/v1/judge-batch")
async def judge_multiple_essays(payload: BatchEssayInput):
    """
    ENDPOINT BATCH: Chấm hàng loạt bài văn cùng lúc một cách tối ưu
    """
    if not SYSTEM_READY:
        return JSONResponse(status_code=503, content={"error": "Hệ thống chưa sẵn sàng."})
        
    try:
        loop = asyncio.get_running_loop()
        
        print(f"📦 [BATCH] Đang chuẩn bị trích xuất RAG song song cho {len(payload.essays)} bài văn...")
        bai_van_list = [essay.content for essay in payload.essays]
        
        # Định nghĩa hàm bọc nhanh để chạy song song RAG
        def run_rag(content):
            return retriever.search_context(content, limit=3)
            
        # Kích hoạt tìm kiếm RAG song song cho toàn bộ danh sách bài văn
        rag_tasks = [loop.run_in_executor(executor, run_rag, content) for content in bai_van_list]
        tai_lieu_chuan_list = await asyncio.gather(*rag_tasks)
        
        print("🤖 [BATCH] Kích hoạt hàm grade_batch() chấm tuần tự để tránh nghẽn/rate limit API...")
        list_ket_qua = await loop.run_in_executor(
            executor, grade_batch, bai_van_list, tai_lieu_chuan_list
        )
        
        return JSONResponse(status_code=200, content={"results": list_ket_qua})
        
    except Exception as e:
        err_str = str(e)
        if 'response_format' in err_str or 'BadRequestError' in err_str:
            # Return list of fallback results
            list_ket_qua = [
                {
                    "diem": 0.0,
                    "xep_loai": "Không chấm được",
                    "nhan_xet_chung": f"Lỗi LLM: {err_str}",
                    "uu_diem": [],
                    "nhuoc_diem": [],
                    "chi_tiet_diem": {"noi_dung": 0.0, "hinh_thuc": 0.0, "sang_tao": 0.0},
                    "ket_luan": "Kết quả tạm thời do lỗi hệ thống LLM."
                }
                for _ in bai_van_list
            ]
            return JSONResponse(status_code=200, content={"results": list_ket_qua})

        return JSONResponse(status_code=500, content={"error": "Lỗi xử lý Batch nội bộ", "detail": err_str})


# ===========================================================
# 5. KHỞI CHẠY SERVER LOCAL
# ===========================================================
if __name__ == "__main__":
    import uvicorn
    # Chạy lệnh này ở terminal để bật server: python main.py
    uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=False)