Spaces:

LucianStorm
/

Tiny_LLAMA_Assistant

Sleeping

LucianStorm commited on Feb 4, 2025

Commit

a4a53e5

verified ·

1 Parent(s): 12c4a92

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,12 +7,38 @@ import os
 app = FastAPI(title="TinyLlama Fitness Bot")
-# Test route to check if API is running
 @app.get("/")
 def read_root():
-    return {"status": "API is running!"}
-# Test route to check environment
 @app.get("/debug")
 def debug_info():
     return {
@@ -20,20 +46,15 @@ def debug_info():
             {"path": route.path, "name": route.name}
             for route in app.routes
         ],
-        "model_loaded": "model" in globals(),
-        "tokenizer_loaded": "tokenizer" in globals()
     }
-class Query(BaseModel):
-    prompt: str
-    max_length: int = 256
-    temperature: float = 0.7
-class Response(BaseModel):
-    response: str
-@app.post("/chat", response_model=Response)
 async def chat(query: Query):
     try:
         system_prompt = """You are a knowledgeable fitness and nutrition assistant."""
         formatted_prompt = f"<|system|>{system_prompt}</s><|user|>{query.prompt}</s><|assistant|>"

 app = FastAPI(title="TinyLlama Fitness Bot")
+print("Loading model and tokenizer...")
+# Initialize model and tokenizer globally
+try:
+    model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.float32,
+        low_cpu_mem_usage=True
+    )
+    print("Model and tokenizer loaded successfully!")
+    MODEL_LOADED = True
+except Exception as e:
+    print(f"Error loading model: {e}")
+    MODEL_LOADED = False
+class Query(BaseModel):
+    prompt: str
+    max_length: int = 256
+    temperature: float = 0.7
+class Response(BaseModel):
+    response: str
 @app.get("/")
 def read_root():
+    return {
+        "status": "API is running!",
+        "model_loaded": MODEL_LOADED
+    }
 @app.get("/debug")
 def debug_info():
     return {
             {"path": route.path, "name": route.name}
             for route in app.routes
         ],
+        "model_loaded": MODEL_LOADED,
+        "model_name": model_name if MODEL_LOADED else None,
     }
+@app.post("/chat")
 async def chat(query: Query):
+    if not MODEL_LOADED:
+        raise HTTPException(status_code=503, detail="Model not loaded")
     try:
         system_prompt = """You are a knowledgeable fitness and nutrition assistant."""
         formatted_prompt = f"<|system|>{system_prompt}</s><|user|>{query.prompt}</s><|assistant|>"