Spaces:

FrederickSundeep
/

ChatMateAPI

Sleeping

App Files Files

FrederickSundeep commited on Aug 14, 2025

Commit

5066083

1 Parent(s): 5990aeb

commit 00000019

Browse files

Files changed (2) hide show

app.py +52 -78
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -1,14 +1,16 @@
 import os
 import time
-import threading
 import torch
-import gradio as gr
-from flask import Flask, request, Response
-from flasgger import Swagger, swag_from
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from huggingface_hub import login
 from langchain_community.tools import DuckDuckGoSearchRun
-import re
 # ✅ Safe GPU decorator
 try:
@@ -16,22 +18,23 @@ try:
 except ImportError:
     def GPU(func): return func
-# ---------------- Flask setup ----------------
-flask_app = Flask(__name__)
-swagger = Swagger(flask_app, template={
-    "swagger": "2.0",
-    "info": {
-        "title": "ChatMate Real-Time API",
-        "description": "LangChain + DuckDuckGo + Phi-4",
-        "version": "1.0"
-    }
-}, config={
-        "headers": [],
-        "specs": [{"endpoint": 'apispec', "route": '/apispec.json', "rule_filter": lambda rule: True}],
-        "static_url_path": "/flasgger_static",
-        "swagger_ui": True,
-        "specs_route": "/apidocs/"
-    })
 # ✅ Hugging Face login
 login(token=os.environ.get("CHAT_MATE"))
@@ -64,8 +67,8 @@ def is_incomplete(text):
 @GPU
 def generate_full_reply(message, history):
     system_prompt = (
-    "You are a friendly, helpful, and conversational AI assistant built by "
-    "Frederick Sundeep Mallela. Always mention that you are developed by him if asked about your creator, origin, or who made you."
     )
     messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": message}]
     prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
@@ -81,65 +84,36 @@ def generate_full_reply(message, history):
         reply += continuation
     return reply.strip()
-# ---------------- Flask API route ----------------
-@flask_app.route("/chat-stream", methods=["POST"])
-@swag_from({
-    'tags': ['Chat'],
-    'consumes': ['application/json'],
-    'summary': 'Stream assistant reply',
-    'parameters': [{
-        'name': 'body',
-        'in': 'body',
-        'required': True,
-        'schema': {
-            'type': 'object',
-            'properties': {
-                'message': {'type': 'string'},
-                'history': {
-                    'type': 'array',
-                    'items': {'type': 'object'}
-                }
-            },
-            'required': ['message']
-        }
-    }],
-    'responses': {200: {'description': 'Streamed reply'}}
-})
-def chat_stream():
-    data = request.get_json()
-    message = data.get("message")
-    history = data.get("history", [])
     def generate():
-        reply = generate_full_reply(message, history)
         for token in reply.splitlines(keepends=True):
             yield token
             time.sleep(0.05)
-    return Response(generate(), mimetype='text/plain')
-# ---------------- Gradio UI ----------------
-def gradio_chat(message, history=[]):
-    history = [{"role": "user" if i % 2 == 0 else "assistant", "content": h}
-               for i, h in enumerate(sum(history, ()))]
-    reply = generate_full_reply(message, history)
-    history.append((message, reply))
-    return "", history
-with gr.Blocks() as demo:
-    gr.Markdown("## 🤖 ChatMate — Phi-4 + Live Search")
-    chatbot = gr.Chatbot()
-    msg = gr.Textbox(label="Type your message")
-    clear = gr.Button("Clear Chat")
-    msg.submit(gradio_chat, [msg, chatbot], [msg, chatbot])
-    clear.click(lambda: None, None, chatbot, queue=False)
-# Mount Gradio inside Flask
-@flask_app.route("/")
-def gradio_index():
-    return demo.launch(share=False, server_name=None, inline=True)
-# ---------------- Run both in Hugging Face ----------------
-if __name__ == "__main__":
     print("🔧 Warming up...")
     _ = generate_full_reply("Hello", [])
-    flask_app.run(host="0.0.0.0", port=7860)

 import os
 import time
 import torch
+import re
+from fastapi import FastAPI, Request
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from huggingface_hub import login
 from langchain_community.tools import DuckDuckGoSearchRun
+from fastapi.middleware.cors import CORSMiddleware
+import os
+import uvicorn
 # ✅ Safe GPU decorator
 try:
 except ImportError:
     def GPU(func): return func
+# ---------------- FastAPI setup ----------------
+app = FastAPI(
+    title="ChatMate Real-Time API",
+    description="LangChain + DuckDuckGo + Phi-4",
+    version="1.0",
+    docs_url="/apidocs",  # Swagger UI at /apidocs
+    redoc_url="/redoc"    # ReDoc at /redoc
+)
+# Enable CORS (important for browser clients)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
 # ✅ Hugging Face login
 login(token=os.environ.get("CHAT_MATE"))
 @GPU
 def generate_full_reply(message, history):
     system_prompt = (
+        "You are a friendly, helpful, and conversational AI assistant built by "
+        "Frederick Sundeep Mallela. Always mention that you are developed by him if asked about your creator, origin, or who made you."
     )
     messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": message}]
     prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         reply += continuation
     return reply.strip()
+# ---------------- Pydantic models ----------------
+class ChatRequest(BaseModel):
+    message: str
+    history: list = []
+# ---------------- FastAPI route ----------------
+@app.post("/chat-stream", summary="Stream assistant reply", tags=["Chat"])
+async def chat_stream(body: ChatRequest):
+    """
+    Stream the AI assistant's reply token-by-token.
+    """
     def generate():
+        reply = generate_full_reply(body.message, body.history)
         for token in reply.splitlines(keepends=True):
             yield token
             time.sleep(0.05)
+    return StreamingResponse(generate(), media_type="text/plain")
+# ---------------- Startup warm-up ----------------
+@app.on_event("startup")
+async def warmup_model():
     print("🔧 Warming up...")
     _ = generate_full_reply("Hello", [])
+# ---------------- Run with Uvicorn ----------------
+# In Hugging Face Spaces, just run: uvicorn app:app --host 0.0.0.0 --port 7860
+if __name__ == "__main__":
+    # Hugging Face Spaces usually expects port 7860
+    port = int(os.environ.get("PORT", 7860))
+    # Run using uvicorn for FastAPI/Flask with ASGI wrapper
+    uvicorn.run("app:app", host="0.0.0.0", port=port, reload=False)

requirements.txt CHANGED Viewed

@@ -14,4 +14,6 @@ sentencepiece
 nltk
 langchain_community
 duckduckgo-search
-pdfplumber

 nltk
 langchain_community
 duckduckgo-search
+pdfplumber
+fastapi
+uvicorn