| | from fastapi import FastAPI, HTTPException |
| | import time |
| | from typing import List, Dict |
| | import logging |
| |
|
| | |
| | from .budget_packer import enhanced_greedy_pack |
| | from .cross_encoder import SafeCrossEncoderManager |
| | from .capsule_logger import ExplainCapsuleLogger |
| |
|
| | |
| |
|
| | app = FastAPI( |
| | title="CRoM-EfficientLLM Server", |
| | description="Context Reranking and Management for Efficient LLMs", |
| | version="1.0.1" |
| | ) |
| |
|
| | logging.basicConfig(level=logging.INFO) |
| |
|
| | |
| | |
| | ce_manager = SafeCrossEncoderManager(model_name="ms-marco-TinyBERT-L-2-v2") |
| | capsule_logger = ExplainCapsuleLogger(log_directory="artifacts/logs") |
| |
|
| |
|
| | |
| |
|
| | class ProcessResponseV2: |
| | """ํ์ฅ๋ /process ์๋ํฌ์ธํธ ์๋ต ์คํค๋ง ํฌํผ""" |
| | |
| | @staticmethod |
| | def create_response(query: str, packed_chunks: List[Dict], |
| | processing_stats: Dict, cross_encoder_status: str, |
| | processing_time: float) -> Dict: |
| | """๊ฐ์ ๋ ์๋ต ์์ฑ""" |
| | |
| | response = { |
| | "success": True, |
| | "query": query, |
| | "chunks": packed_chunks, |
| | "stats": processing_stats, |
| | "meta": { |
| | "cross_encoder_status": cross_encoder_status, |
| | "processing_time_ms": processing_time * 1000, |
| | "timestamp": time.time() |
| | } |
| | } |
| | return response |
| |
|
| | |
| |
|
| | @app.post("/process", summary="Rerank and pack text chunks") |
| | def process_chunks(query: str, chunks: List[Dict], budget: int = 4096): |
| | """ |
| | ์ฃผ์ด์ง ์ฟผ๋ฆฌ์ ์ฒญํฌ ๋ชฉ๋ก์ ๋ฆฌ๋ญํนํ๊ณ ์์ฐ์ ๋ง๊ฒ ํจํนํฉ๋๋ค. |
| | """ |
| | start_time = time.time() |
| |
|
| | try: |
| | |
| | doc_texts = [chunk.get("text", "") for chunk in chunks] |
| | scores = ce_manager.rerank(query, doc_texts) |
| | for chunk, score in zip(chunks, scores): |
| | chunk["score"] = score |
| |
|
| | |
| | packed_chunks, stats = enhanced_greedy_pack(chunks, budget=budget, score_key="score") |
| |
|
| | |
| | processing_time = time.time() - start_time |
| | response_data = ProcessResponseV2.create_response( |
| | query=query, |
| | packed_chunks=packed_chunks, |
| | processing_stats=stats, |
| | cross_encoder_status=ce_manager.get_status_for_response(), |
| | processing_time=processing_time |
| | ) |
| |
|
| | |
| | capsule = capsule_logger.create_explain_capsule( |
| | query=query, |
| | response_data=response_data, |
| | processing_stats=stats, |
| | cross_encoder_status=ce_manager.get_status_for_response() |
| | ) |
| | capsule_logger.log_capsule(capsule) |
| |
|
| | return response_data |
| |
|
| | except Exception as e: |
| | logging.error(f"Error during /process: {e}", exc_info=True) |
| | |
| | capsule_logger.log_error({ |
| | "endpoint": "/process", |
| | "error": str(e), |
| | "query": query, |
| | }) |
| | raise HTTPException(status_code=500, detail=f"Internal Server Error: {e}") |
| |
|
| | @app.get("/healthz", summary="Health check") |
| | def health_check(): |
| | """์๋ฒ์ ์ํ๋ฅผ ํ์ธํฉ๋๋ค.""" |
| | return {"status": "ok", "cross_encoder": ce_manager.get_status_for_response()} |
| |
|
| | @app.get("/metrics", summary="Get Prometheus metrics") |
| | def get_metrics(): |
| | """Prometheus ๋ฉํธ๋ฆญ์ ๋
ธ์ถํฉ๋๋ค.""" |
| | |
| | return {"message": "Metrics endpoint is active. Implement with prometheus-client."} |
| |
|