Spaces:

Trigger82
/

Hmm

Sleeping

App Files Files Community

Trigger82 commited on May 31, 2025

Commit

81cbe70

verified ·

1 Parent(s): c7ceace

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -58

app.py CHANGED Viewed

@@ -1,37 +1,43 @@
 from fastapi import FastAPI, Request, HTTPException
 from fastapi.responses import JSONResponse, HTMLResponse
-from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 import os
 import logging
 import uvicorn
-# Setup logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Base path from Hugging Face Spaces
 BASE_PATH = os.getenv("SPACE_APP_PATH", "").rstrip("/")
 logger.info(f"Using base path: '{BASE_PATH}'")
-# Initialize app
-app = FastAPI(title="Trigger AI", description="Lightning fast chatbot", version="1.0")
-# Load lightweight fast model (phi-1.5)
 try:
     logger.info("Loading tokenizer and model...")
-    tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5")
-    model = AutoModelForCausalLM.from_pretrained("microsoft/phi-1_5")
-    logger.info("Model loaded.")
 except Exception as e:
-    logger.error(f"Model load error: {e}")
-    raise RuntimeError("Model failed to load")
-# In-memory chat memory per user_id
-chat_memory = {}
 @app.middleware("http")
-async def strip_base_path(request: Request, call_next):
     path = request.scope["path"]
     if BASE_PATH and path.startswith(BASE_PATH):
         request.scope["path"] = path[len(BASE_PATH):]
@@ -40,74 +46,93 @@ async def strip_base_path(request: Request, call_next):
 @app.get("/")
 async def root():
     return {
-        "message": "✅ Trigger AI is active",
-        "try": f"{BASE_PATH}/ai?query=Hello&user_id=233XXXXXXXXX"
     }
 @app.get("/ai")
-async def ai(request: Request):
-    query = request.query_params.get("query", "").strip()
-    user_id = request.query_params.get("user_id", "").strip()
-    if not query or not user_id:
-        raise HTTPException(status_code=400, detail="Missing 'query' or 'user_id'")
     try:
-        # Tokenize input
-        input_ids = tokenizer.encode(query, return_tensors="pt")
-        # Load history
-        history = chat_memory.get(user_id, [])
-        full_input = torch.cat(history + [input_ids], dim=-1) if history else input_ids
-        # Generate response
         output = model.generate(
-            full_input,
-            max_new_tokens=100,
-            do_sample=True,
-            top_k=40,
-            top_p=0.9,
             temperature=0.8,
-            pad_token_id=tokenizer.eos_token_id
         )
-        # Decode result
-        response = tokenizer.decode(output[:, full_input.shape[-1]:][0], skip_special_tokens=True)
-        # Save memory
-        chat_memory[user_id] = [full_input, output]
         return {"reply": response}
     except Exception as e:
-        logger.error(f"Error: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
-@app.get("/reset")
-async def reset(user_id: str = "default"):
-    if user_id in chat_memory:
-        del chat_memory[user_id]
-    return {"status": "cleared", "user_id": user_id}
 @app.get("/health")
 async def health():
     return {
-        "status": "🟢 online",
-        "users": len(chat_memory),
-        "model": "phi-1.5",
         "base_path": BASE_PATH
     }
 @app.get("/test", response_class=HTMLResponse)
-async def test():
     return f"""
     <html>
         <body>
-            <h2>Trigger AI Test</h2>
-            <a href="{BASE_PATH}/ai?query=Hello&user_id=tester">Talk to Trigger</a>
         </body>
     </html>
     """
 if __name__ == "__main__":
-    uvicorn.run("app:app", host="0.0.0.0", port=7860)

 from fastapi import FastAPI, Request, HTTPException
 from fastapi.responses import JSONResponse, HTMLResponse
+from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 import os
 import logging
 import uvicorn
+# Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Initialize FastAPI
+app = FastAPI(
+    title="PHI Chatbot API",
+    description="Chatbot API using Microsoft's Phi-2 model",
+    version="1.0",
+)
+# Get base path from environment (for Hugging Face Spaces)
 BASE_PATH = os.getenv("SPACE_APP_PATH", "").rstrip("/")
 logger.info(f"Using base path: '{BASE_PATH}'")
+# Load model and tokenizer
 try:
     logger.info("Loading tokenizer and model...")
+    tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
+    model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2")
+    model.eval()
+    logger.info("Model loaded successfully!")
 except Exception as e:
+    logger.error(f"Model loading failed: {str(e)}")
+    raise RuntimeError("Model initialization failed") from e
+# In-memory chat memory
+chat_history = {}
+# Middleware for base path
 @app.middleware("http")
+async def add_base_path(request: Request, call_next):
     path = request.scope["path"]
     if BASE_PATH and path.startswith(BASE_PATH):
         request.scope["path"] = path[len(BASE_PATH):]
 @app.get("/")
 async def root():
     return {
+        "message": "🟢 PHI API is running",
+        "endpoints": {
+            "chat": f"{BASE_PATH}/ai?query=Hello&user_id=yourname",
+            "health": f"{BASE_PATH}/health",
+            "reset": f"{BASE_PATH}/reset?user_id=yourname",
+            "test": f"{BASE_PATH}/test",
+            "docs": f"{BASE_PATH}/docs"
+        }
     }
 @app.get("/ai")
+async def chat(request: Request):
     try:
+        user_input = request.query_params.get("query", "").strip()
+        user_id = request.query_params.get("user_id", "default").strip()
+        if not user_input:
+            raise HTTPException(status_code=400, detail="Missing 'query'")
+        if len(user_input) > 200:
+            raise HTTPException(status_code=400, detail="Query too long (max 200 characters)")
+        # Prompt style: phi models work best with natural instructions
+        memory = chat_history.get(user_id, [])
+        prompt = "You are a friendly, funny AI assistant called Trigger.\n\n"
+        for q, a in memory:
+            prompt += f"User: {q}\nTrigger: {a}\n"
+        prompt += f"User: {user_input}\nTrigger:"
+        input_ids = tokenizer(prompt, return_tensors="pt").input_ids
         output = model.generate(
+            input_ids,
+            max_new_tokens=128,
+            pad_token_id=tokenizer.eos_token_id,
             temperature=0.8,
+            top_k=50,
+            top_p=0.95,
         )
+        generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
+        response = generated_text[len(prompt):].strip().split("\n")[0]
+        # Save history (limit to last 5 exchanges)
+        memory.append((user_input, response))
+        chat_history[user_id] = memory[-5:]
         return {"reply": response}
+    except torch.cuda.OutOfMemoryError:
+        logger.error("CUDA out of memory error")
+        if user_id in chat_history:
+            del chat_history[user_id]
+        raise HTTPException(status_code=500, detail="Memory error. Try again.")
     except Exception as e:
+        logger.error(f"Processing error: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Error: {str(e)}")
 @app.get("/health")
 async def health():
     return {
+        "status": "healthy",
+        "model": "microsoft/phi-2",
+        "users": len(chat_history),
         "base_path": BASE_PATH
     }
+@app.get("/reset")
+async def reset_history(user_id: str = "default"):
+    if user_id in chat_history:
+        del chat_history[user_id]
+    return {"status": "success", "message": f"History cleared for user {user_id}"}
 @app.get("/test", response_class=HTMLResponse)
+async def test_page():
     return f"""
     <html>
         <body>
+            <h1>PHI Chatbot Test</h1>
+            <p>Base path: {BASE_PATH}</p>
+            <ul>
+                <li><a href="{BASE_PATH}/">Root endpoint</a></li>
+                <li><a href="{BASE_PATH}/ai?query=Hello&user_id=test">Chat endpoint</a></li>
+                <li><a href="{BASE_PATH}/health">Health check</a></li>
+                <li><a href="{BASE_PATH}/docs">API Docs</a></li>
+            </ul>
         </body>
     </html>
     """
+# Run locally
 if __name__ == "__main__":
+    uvicorn.run("app:app", host="0.0.0.0", port=7860, log_level="info", reload=True)