Spaces:

Trigger82
/

Bot

Sleeping

App Files Files Community

Trigger82 commited on May 30, 2025

Commit

dbee570

verified ·

1 Parent(s): e650bc4

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -60

app.py CHANGED Viewed

@@ -1,64 +1,114 @@
-import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
 )
 if __name__ == "__main__":
-    demo.launch()

+from fastapi import FastAPI, Request, HTTPException
+from fastapi.responses import JSONResponse
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+import os
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Hugging Face Spaces path setup
+HF_SPACE = os.getenv("SPACE_ID", "")
+BASE_PATH = f"/spaces/{HF_SPACE}" if HF_SPACE else ""
+# FastAPI initialization
+app = FastAPI(
+    title="Phi-2 Chat API",
+    description="Chatbot API using microsoft/phi-2, CPU-optimized",
+    version="1.0",
+    root_path=BASE_PATH,
+    docs_url="/docs" if not BASE_PATH else f"{BASE_PATH}/docs",
+    redoc_url=None
 )
+# Load model and tokenizer
+try:
+    logger.info("Loading Phi-2 tokenizer and model...")
+    tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
+    model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2")
+    logger.info("Model loaded successfully!")
+except Exception as e:
+    logger.error(f"Model loading failed: {str(e)}")
+    raise RuntimeError("Model initialization failed") from e
+# In-memory chat history
+chat_history = {}
+# System prompt to guide tone
+SYSTEM_PROMPT = (
+    "You are a helpful, chill, clever, and fun AI assistant called 𝕴 𝖆𝖒 𝖍𝖎𝖒. "
+    "Talk like a smooth, witty friend. Be friendly and humanlike.\n"
+)
+@app.get("/", include_in_schema=False)
+async def root():
+    return {"message": "🟢 Phi-2 API is live. Use /ai?query=Hello&user_id=yourname"}
+@app.get("/ai")
+async def chat(request: Request):
+    try:
+        user_input = request.query_params.get("query", "").strip()
+        user_id = request.query_params.get("user_id", "default").strip()
+        if not user_input:
+            raise HTTPException(status_code=400, detail="Missing 'query' parameter.")
+        if len(user_input) > 200:
+            raise HTTPException(status_code=400, detail="Query too long (max 200 characters)")
+        # Retrieve last conversation
+        user_history = chat_history.get(user_id, [])
+        history_prompt = ""
+        for entry in user_history[-3:]:  # Last 3 exchanges
+            history_prompt += f"User: {entry['q']}\nAI: {entry['a']}\n"
+        full_prompt = SYSTEM_PROMPT + history_prompt + f"User: {user_input}\nAI:"
+        # Tokenize and generate
+        input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids
+        output_ids = model.generate(
+            input_ids,
+            max_new_tokens=100,
+            temperature=0.8,
+            top_p=0.95,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id
+        )
+        response = tokenizer.decode(output_ids[0][input_ids.shape[-1]:], skip_special_tokens=True).strip()
+        # Store updated history
+        user_history.append({"q": user_input, "a": response})
+        chat_history[user_id] = user_history
+        return {"reply": response}
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}") from e
+@app.get("/health")
+async def health_check():
+    return {
+        "status": "healthy",
+        "model": "microsoft/phi-2",
+        "users": len(chat_history),
+        "space_id": HF_SPACE
+    }
+@app.get("/reset")
+async def reset_history(user_id: str = "default"):
+    if user_id in chat_history:
+        del chat_history[user_id]
+    return {"status": "success", "message": f"History cleared for user {user_id}"}
 if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=7860,
+        log_level="info",
+        timeout_keep_alive=30
+    )