Spaces:

lahiruchamika27
/

DeepCoder

Sleeping

App Files Files Community

lahiruchamika27 commited on Apr 27, 2025

Commit

9db031d

verified ·

1 Parent(s): 84efcee

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -37

app.py CHANGED Viewed

@@ -1,17 +1,30 @@
 import os
 import torch
-from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
-from typing import List, Dict, Optional
 from datasets import load_dataset
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import uvicorn
 app = FastAPI()
 # Global variables
 model = None
 tokenizer = None
 dataset = None
 # Pydantic models for request/response
@@ -26,35 +39,67 @@ class ChatRequest(BaseModel):
 class ChatResponse(BaseModel):
     response: str
 # Load model on startup
 @app.on_event("startup")
 async def startup_event():
-    global model, tokenizer, dataset
     try:
-        # Load the model and tokenizer
-        model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
         model = AutoModelForCausalLM.from_pretrained(
-            model_id,
-            torch_dtype=torch.float16,
-            device_map="auto"
         )
-        # Load dataset
-        dataset = load_dataset("lahiruchamika27/tia")
-        print("Model, tokenizer, and dataset loaded successfully!")
     except Exception as e:
-        print(f"Error loading model: {str(e)}")
-        # Continue without failing - we'll handle errors in the endpoints
 @app.post("/api/chat", response_model=ChatResponse)
 async def chat(request: ChatRequest):
-    global model, tokenizer
-    # Ensure model is loaded
-    if model is None or tokenizer is None:
-        raise HTTPException(status_code=500, detail="Model or tokenizer not loaded")
     try:
         # Format conversation
@@ -70,30 +115,37 @@ async def chat(request: ChatRequest):
         else:
             full_prompt = f"User: {request.message}\nAssistant:"
-        # Tokenize and generate
-        inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
-        with torch.no_grad():
-            outputs = model.generate(
-                inputs["input_ids"],
-                max_new_tokens=512,
-                temperature=0.7,
-                top_p=0.9,
-                do_sample=True
-            )
-        # Decode the output
-        response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
-        return ChatResponse(response=response.strip())
     except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
 @app.get("/api/examples")
 async def get_examples(count: int = 5, split: str = "train"):
-    global dataset
     if dataset is None:
         raise HTTPException(status_code=500, detail="Dataset not loaded")
@@ -104,13 +156,23 @@ async def get_examples(count: int = 5, split: str = "train"):
             return {"examples": examples}
         else:
             raise HTTPException(status_code=400, detail=f"Split '{split}' not found in dataset")
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.get("/health")
 async def health_check():
-    return {"status": "ok", "model_loaded": model is not None, "tokenizer_loaded": tokenizer is not None}
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))

 import os
+import logging
+import sys
 import torch
+from fastapi import FastAPI, HTTPException, Request
+from fastapi.responses import JSONResponse
 from pydantic import BaseModel
+from typing import List, Dict, Optional, Any
 from datasets import load_dataset
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import uvicorn
+import time
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[logging.StreamHandler(sys.stdout)]
+)
+logger = logging.getLogger(__name__)
 app = FastAPI()
 # Global variables
 model = None
 tokenizer = None
+generator = None
 dataset = None
 # Pydantic models for request/response
 class ChatResponse(BaseModel):
     response: str
+# Use a much smaller model suitable for Hugging Face Spaces
+MODEL_ID = "distilgpt2"  # Using a very small model for testing
+# Error handler
+@app.exception_handler(Exception)
+async def generic_exception_handler(request: Request, exc: Exception):
+    logger.error(f"Unhandled exception: {str(exc)}", exc_info=True)
+    return JSONResponse(
+        status_code=500,
+        content={"detail": f"Internal server error: {str(exc)}"}
+    )
 # Load model on startup
 @app.on_event("startup")
 async def startup_event():
+    global model, tokenizer, generator, dataset
     try:
+        logger.info(f"Loading model: {MODEL_ID}")
+        start_time = time.time()
+        # Load the tokenizer
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+        logger.info(f"Tokenizer loaded in {time.time() - start_time:.2f} seconds")
+        # Load the model with optimizations
         model = AutoModelForCausalLM.from_pretrained(
+            MODEL_ID,
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+            low_cpu_mem_usage=True,
+            device_map="auto" if torch.cuda.is_available() else None
         )
+        logger.info(f"Model loaded in {time.time() - start_time:.2f} seconds")
+        # Create a text generation pipeline
+        device = 0 if torch.cuda.is_available() else -1
+        generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
+        logger.info(f"Generator pipeline created in {time.time() - start_time:.2f} seconds")
+        # Try to load dataset
+        try:
+            logger.info("Loading dataset: lahiruchamika27/tia")
+            dataset = load_dataset("lahiruchamika27/tia")
+            logger.info("Dataset loaded successfully")
+        except Exception as e:
+            logger.error(f"Error loading dataset: {str(e)}")
+            logger.info("Continuing without dataset")
+        logger.info(f"Startup completed in {time.time() - start_time:.2f} seconds")
     except Exception as e:
+        logger.error(f"Error during startup: {str(e)}", exc_info=True)
+        logger.info("API will still be available but might not function correctly")
 @app.post("/api/chat", response_model=ChatResponse)
 async def chat(request: ChatRequest):
+    logger.info(f"Received chat request: {request.message[:50]}...")
+    # Check if model is loaded
+    if generator is None:
+        logger.error("Text generator not initialized")
+        raise HTTPException(status_code=500, detail="Text generation pipeline not initialized")
     try:
         # Format conversation
         else:
             full_prompt = f"User: {request.message}\nAssistant:"
+        logger.info(f"Generated prompt: {full_prompt[:100]}...")
+        # Generate response
+        start_time = time.time()
+        outputs = generator(
+            full_prompt,
+            max_new_tokens=100,
+            temperature=0.7,
+            top_p=0.9,
+            do_sample=True
+        )
+        logger.info(f"Text generated in {time.time() - start_time:.2f} seconds")
+        # Extract response
+        generated_text = outputs[0]['generated_text']
+        # Extract only the assistant's response
+        response_text = generated_text[len(full_prompt):].strip()
+        # If empty or just whitespace, return a fallback message
+        if not response_text or response_text.isspace():
+            response_text = "I'm sorry, I'm having trouble generating a response right now."
+        logger.info(f"Final response: {response_text[:50]}...")
+        return ChatResponse(response=response_text)
     except Exception as e:
+        logger.error(f"Error generating response: {str(e)}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Error generating response: {str(e)}")
 @app.get("/api/examples")
 async def get_examples(count: int = 5, split: str = "train"):
     if dataset is None:
         raise HTTPException(status_code=500, detail="Dataset not loaded")
             return {"examples": examples}
         else:
             raise HTTPException(status_code=400, detail=f"Split '{split}' not found in dataset")
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.get("/health")
 async def health_check():
+    system_info = {
+        "status": "ok",
+        "model_loaded": model is not None,
+        "tokenizer_loaded": tokenizer is not None,
+        "generator_loaded": generator is not None,
+        "dataset_loaded": dataset is not None,
+        "model_name": MODEL_ID,
+        "torch_device": "cuda" if torch.cuda.is_available() else "cpu",
+        "cuda_available": torch.cuda.is_available(),
+        "cuda_device_count": torch.cuda.device_count() if torch.cuda.is_available() else 0
+    }
+    return system_info
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))