Spaces:

pierreramez
/

chatbot-api

Sleeping

App Files Files Community

pierreramez commited on Nov 27, 2025

Commit

33b76bd

verified ·

1 Parent(s): 6b940c7

Fixed app.py

Browse files

Files changed (1) hide show

app.py +27 -73

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ Enhanced FastAPI Backend with Feedback Management
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-from typing import List, Optional, Dict
 import json
 import time
 from pathlib import Path
@@ -28,52 +28,48 @@ app.add_middleware(
     allow_headers=["*"],
 )
 class ChatRequest(BaseModel):
     message: str
-    history: Optional<List[Dict[str, str]]] = []
-    max_length: Optional[int] = 200
-    temperature: Optional[float] = 0.7
 class FeedbackRequest(BaseModel):
     user_input: str
     model_reply: str
     user_correction: str
-    reason: Optional[str] = "user_correction"
 class ReloadAdapterRequest(BaseModel):
     adapter_path: str
 class ChatResponse(BaseModel):
     reply: str
     timestamp: float
 class FeedbackResponse(BaseModel):
     status: str
     message: str
 class StatsResponse(BaseModel):
     total_interactions: int
     corrections: int
     accepted: int
     correction_rate: float
 class CorrectionCountResponse(BaseModel):
     corrections: int
     total: int
     ready_to_train: bool
 class DownloadFeedbackResponse(BaseModel):
     content: str
     count: int
 class ModelManager:
     """Singleton model manager to load model once and reuse."""
@@ -121,7 +117,7 @@ class ModelManager:
         if self._tokenizer.pad_token is None:
             self._tokenizer.pad_token = self._tokenizer.eos_token
-        # CRITICAL FIX: Only try 4-bit if we actually have a GPU
         if use_4bit and self._device == "cuda":
             print("🚀 GPU detected: Loading in 4-bit mode")
             try:
@@ -154,11 +150,10 @@ class ModelManager:
                 model_name,
                 device_map=self._device,
                 trust_remote_code=True,
-                # Use float32 for CPU stability
                 torch_dtype=torch.float32 if self._device == "cpu" else torch.float16
             )
-        if adapter_path and (isinstance(adapter_path, str) and adapter_path.strip()):
             print(f"Loading LoRA adapter: {adapter_path}")
             try:
                 self._model = PeftModel.from_pretrained(
@@ -227,18 +222,17 @@ class ModelManager:
             skip_special_tokens=True
         ).strip()
-        # Remove the system/user prompt if it leaked into response
         if "assistant" in reply.lower() and len(reply.split("assistant")) > 1:
              reply = reply.split("assistant")[-1].strip()
         return reply
 class FeedbackManager:
     """Manages feedback storage and statistics."""
     def __init__(self, feedback_file: str = "data/feedback.jsonl"):
         self.feedback_file = Path(feedback_file)
-        # Ensure directory exists (Handled by Dockerfile too, but good safety)
         self.feedback_file.parent.mkdir(parents=True, exist_ok=True)
     def save_interaction(
@@ -298,10 +292,10 @@ class FeedbackManager:
             "correction_rate": correction_rate
         }
 model_manager = ModelManager()
 feedback_manager = FeedbackManager(feedback_file="data/feedback.jsonl")
 @app.on_event("startup")
 async def startup_event():
@@ -310,20 +304,17 @@ async def startup_event():
     model_manager.initialize(
         # 1. The Base Model (The heavy lifter)
-        # We use the official Llama 3.2 3B Instruct as the foundation
         model_name="meta-llama/Llama-3.2-3B-Instruct",
-        # 2. Adapter (The personalization)
-        adapter_path="pierreramez/Llama-3.2-3B-Instruct-bnb-4bit_finetuned",
-        # 3. CPU Optimization
-        # Must be False for the free CPU tier
         use_4bit=False
     )
     print("Ready to serve!")
 @app.get("/")
 async def root():
     """Root endpoint"""
@@ -344,7 +335,6 @@ async def root():
         }
     }
 @app.get("/health")
 async def health_check():
     """Health check endpoint"""
@@ -355,7 +345,6 @@ async def health_check():
         "device": str(model_manager._device)
     }
 @app.post("/chat", response_model=ChatResponse)
 async def chat(request: ChatRequest):
     """Generate chatbot response."""
@@ -383,12 +372,10 @@ async def chat(request: ChatRequest):
         print(f"Error during chat: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/feedback", response_model=FeedbackResponse)
 async def submit_feedback(request: FeedbackRequest):
     """Submit correction for a model response."""
     try:
-        # Optimistic feedback update: try to find existing entry
         if feedback_manager.feedback_file.exists():
             with open(feedback_manager.feedback_file, "r", encoding="utf-8") as f:
                 lines = f.readlines()
@@ -396,7 +383,6 @@ async def submit_feedback(request: FeedbackRequest):
             lines = []
         found = False
-        # Search backwards to find the most recent matching interaction
         for i in range(len(lines) - 1, -1, -1):
             try:
                 record = json.loads(lines[i])
@@ -423,7 +409,6 @@ async def submit_feedback(request: FeedbackRequest):
                 message="Feedback recorded successfully"
             )
         else:
-            # If not found (e.g., app restarted), just append new record
             feedback_manager.save_interaction(
                 user_input=request.user_input,
                 model_reply=request.model_reply,
@@ -439,27 +424,20 @@ async def submit_feedback(request: FeedbackRequest):
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.get("/stats", response_model=StatsResponse)
 async def get_stats():
     """Get feedback statistics."""
     stats = feedback_manager.get_stats()
     return StatsResponse(**stats)
 @app.get("/correction-count", response_model=CorrectionCountResponse)
 async def get_correction_count():
-    """Get count of corrections for training readiness monitoring."""
     if not feedback_manager.feedback_file.exists():
-        return CorrectionCountResponse(
-            corrections=0,
-            total=0,
-            ready_to_train=False
-        )
     total = 0
     corrections = 0
     with open(feedback_manager.feedback_file, "r", encoding="utf-8") as f:
         for line in f:
             try:
@@ -469,72 +447,48 @@ async def get_correction_count():
                     corrections += 1
             except:
                 pass
     return CorrectionCountResponse(
         corrections=corrections,
         total=total,
         ready_to_train=corrections >= 20
     )
 @app.get("/download-feedback", response_model=DownloadFeedbackResponse)
 async def download_feedback():
-    """Download feedback file for training."""
     if not feedback_manager.feedback_file.exists():
-        return DownloadFeedbackResponse(
-            content="",
-            count=0
-        )
     with open(feedback_manager.feedback_file, 'r', encoding='utf-8') as f:
         content = f.read()
         count = len(content.strip().split('\n')) if content.strip() else 0
-    return DownloadFeedbackResponse(
-        content=content,
-        count=count
-    )
 @app.post("/clear-feedback")
 async def clear_feedback():
-    """Clear feedback file after training."""
     try:
         if feedback_manager.feedback_file.exists():
             feedback_manager.feedback_file.unlink()
-            return {
-                "status": "success",
-                "message": "Feedback file cleared"
-            }
         else:
-            return {
-                "status": "success",
-                "message": "Feedback file already empty"
-            }
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/reload-adapter")
 async def reload_adapter(request: ReloadAdapterRequest):
-    """Hot reload model with new adapter."""
     try:
         model_manager.initialize(
             model_name="meta-llama/Llama-3.2-1B-Instruct",
             adapter_path=request.adapter_path,
-            use_4bit=True
         )
-        return {
-            "status": "success",
-            "adapter": request.adapter_path,
-            "message": "Adapter reloaded successfully"
-        }
     except Exception as e:
-        raise HTTPException(
-            status_code=500,
-            detail=f"Failed to reload adapter: {str(e)}"
-        )
 if __name__ == "__main__":
     import uvicorn

 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
+from typing import Optional, List, Dict, Union
 import json
 import time
 from pathlib import Path
     allow_headers=["*"],
 )
+# --- DATA MODELS (Fixed for Syntax Stability) ---
 class ChatRequest(BaseModel):
     message: str
+    # Simplified type hint to prevent SyntaxError: unmatched ']'
+    history: List[Dict[str, str]] = []
+    max_length: int = 200
+    temperature: float = 0.7
 class FeedbackRequest(BaseModel):
     user_input: str
     model_reply: str
     user_correction: str
+    reason: str = "user_correction"
 class ReloadAdapterRequest(BaseModel):
     adapter_path: str
 class ChatResponse(BaseModel):
     reply: str
     timestamp: float
 class FeedbackResponse(BaseModel):
     status: str
     message: str
 class StatsResponse(BaseModel):
     total_interactions: int
     corrections: int
     accepted: int
     correction_rate: float
 class CorrectionCountResponse(BaseModel):
     corrections: int
     total: int
     ready_to_train: bool
 class DownloadFeedbackResponse(BaseModel):
     content: str
     count: int
+# --- MODEL MANAGER ---
 class ModelManager:
     """Singleton model manager to load model once and reuse."""
         if self._tokenizer.pad_token is None:
             self._tokenizer.pad_token = self._tokenizer.eos_token
+        # GPU check for 4-bit loading
         if use_4bit and self._device == "cuda":
             print("🚀 GPU detected: Loading in 4-bit mode")
             try:
                 model_name,
                 device_map=self._device,
                 trust_remote_code=True,
                 torch_dtype=torch.float32 if self._device == "cpu" else torch.float16
             )
+        if adapter_path and isinstance(adapter_path, str) and adapter_path.strip():
             print(f"Loading LoRA adapter: {adapter_path}")
             try:
                 self._model = PeftModel.from_pretrained(
             skip_special_tokens=True
         ).strip()
         if "assistant" in reply.lower() and len(reply.split("assistant")) > 1:
              reply = reply.split("assistant")[-1].strip()
         return reply
+# --- FEEDBACK MANAGER ---
 class FeedbackManager:
     """Manages feedback storage and statistics."""
     def __init__(self, feedback_file: str = "data/feedback.jsonl"):
         self.feedback_file = Path(feedback_file)
         self.feedback_file.parent.mkdir(parents=True, exist_ok=True)
     def save_interaction(
             "correction_rate": correction_rate
         }
 model_manager = ModelManager()
 feedback_manager = FeedbackManager(feedback_file="data/feedback.jsonl")
+# --- APP EVENTS AND ENDPOINTS ---
 @app.on_event("startup")
 async def startup_event():
     model_manager.initialize(
         # 1. The Base Model (The heavy lifter)
         model_name="meta-llama/Llama-3.2-3B-Instruct",
+        # 2. Adapter (The personalization) - YOUR SPECIFIC REPO
+        adapter_path="pierreramez/Llama-3.2-3B-Instruct-bnb-4bit_finetuned",
+        # 3. CPU Optimization (Must be False for free tier)
         use_4bit=False
     )
     print("Ready to serve!")
 @app.get("/")
 async def root():
     """Root endpoint"""
         }
     }
 @app.get("/health")
 async def health_check():
     """Health check endpoint"""
         "device": str(model_manager._device)
     }
 @app.post("/chat", response_model=ChatResponse)
 async def chat(request: ChatRequest):
     """Generate chatbot response."""
         print(f"Error during chat: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/feedback", response_model=FeedbackResponse)
 async def submit_feedback(request: FeedbackRequest):
     """Submit correction for a model response."""
     try:
         if feedback_manager.feedback_file.exists():
             with open(feedback_manager.feedback_file, "r", encoding="utf-8") as f:
                 lines = f.readlines()
             lines = []
         found = False
         for i in range(len(lines) - 1, -1, -1):
             try:
                 record = json.loads(lines[i])
                 message="Feedback recorded successfully"
             )
         else:
             feedback_manager.save_interaction(
                 user_input=request.user_input,
                 model_reply=request.model_reply,
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.get("/stats", response_model=StatsResponse)
 async def get_stats():
     """Get feedback statistics."""
     stats = feedback_manager.get_stats()
     return StatsResponse(**stats)
 @app.get("/correction-count", response_model=CorrectionCountResponse)
 async def get_correction_count():
+    """Get count of corrections."""
     if not feedback_manager.feedback_file.exists():
+        return CorrectionCountResponse(corrections=0, total=0, ready_to_train=False)
     total = 0
     corrections = 0
     with open(feedback_manager.feedback_file, "r", encoding="utf-8") as f:
         for line in f:
             try:
                     corrections += 1
             except:
                 pass
     return CorrectionCountResponse(
         corrections=corrections,
         total=total,
         ready_to_train=corrections >= 20
     )
 @app.get("/download-feedback", response_model=DownloadFeedbackResponse)
 async def download_feedback():
+    """Download feedback file."""
     if not feedback_manager.feedback_file.exists():
+        return DownloadFeedbackResponse(content="", count=0)
     with open(feedback_manager.feedback_file, 'r', encoding='utf-8') as f:
         content = f.read()
         count = len(content.strip().split('\n')) if content.strip() else 0
+    return DownloadFeedbackResponse(content=content, count=count)
 @app.post("/clear-feedback")
 async def clear_feedback():
+    """Clear feedback file."""
     try:
         if feedback_manager.feedback_file.exists():
             feedback_manager.feedback_file.unlink()
+            return {"status": "success", "message": "Feedback file cleared"}
         else:
+            return {"status": "success", "message": "Feedback file already empty"}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/reload-adapter")
 async def reload_adapter(request: ReloadAdapterRequest):
+    """Hot reload model."""
     try:
         model_manager.initialize(
             model_name="meta-llama/Llama-3.2-1B-Instruct",
             adapter_path=request.adapter_path,
+            use_4bit=False
         )
+        return {"status": "success", "adapter": request.adapter_path, "message": "Adapter reloaded successfully"}
     except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to reload adapter: {str(e)}")
 if __name__ == "__main__":
     import uvicorn