FrederickSundeep commited on
Commit
5066083
Β·
1 Parent(s): 5990aeb

commit 00000019

Browse files
Files changed (2) hide show
  1. app.py +52 -78
  2. requirements.txt +3 -1
app.py CHANGED
@@ -1,14 +1,16 @@
1
  import os
2
  import time
3
- import threading
4
  import torch
5
- import gradio as gr
6
- from flask import Flask, request, Response
7
- from flasgger import Swagger, swag_from
 
8
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
9
  from huggingface_hub import login
10
  from langchain_community.tools import DuckDuckGoSearchRun
11
- import re
 
 
12
 
13
  # βœ… Safe GPU decorator
14
  try:
@@ -16,22 +18,23 @@ try:
16
  except ImportError:
17
  def GPU(func): return func
18
 
19
- # ---------------- Flask setup ----------------
20
- flask_app = Flask(__name__)
21
- swagger = Swagger(flask_app, template={
22
- "swagger": "2.0",
23
- "info": {
24
- "title": "ChatMate Real-Time API",
25
- "description": "LangChain + DuckDuckGo + Phi-4",
26
- "version": "1.0"
27
- }
28
- }, config={
29
- "headers": [],
30
- "specs": [{"endpoint": 'apispec', "route": '/apispec.json', "rule_filter": lambda rule: True}],
31
- "static_url_path": "/flasgger_static",
32
- "swagger_ui": True,
33
- "specs_route": "/apidocs/"
34
- })
 
35
 
36
  # βœ… Hugging Face login
37
  login(token=os.environ.get("CHAT_MATE"))
@@ -64,8 +67,8 @@ def is_incomplete(text):
64
  @GPU
65
  def generate_full_reply(message, history):
66
  system_prompt = (
67
- "You are a friendly, helpful, and conversational AI assistant built by "
68
- "Frederick Sundeep Mallela. Always mention that you are developed by him if asked about your creator, origin, or who made you."
69
  )
70
  messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": message}]
71
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
@@ -81,65 +84,36 @@ def generate_full_reply(message, history):
81
  reply += continuation
82
  return reply.strip()
83
 
84
- # ---------------- Flask API route ----------------
85
- @flask_app.route("/chat-stream", methods=["POST"])
86
- @swag_from({
87
- 'tags': ['Chat'],
88
- 'consumes': ['application/json'],
89
- 'summary': 'Stream assistant reply',
90
- 'parameters': [{
91
- 'name': 'body',
92
- 'in': 'body',
93
- 'required': True,
94
- 'schema': {
95
- 'type': 'object',
96
- 'properties': {
97
- 'message': {'type': 'string'},
98
- 'history': {
99
- 'type': 'array',
100
- 'items': {'type': 'object'}
101
- }
102
- },
103
- 'required': ['message']
104
- }
105
- }],
106
- 'responses': {200: {'description': 'Streamed reply'}}
107
- })
108
- def chat_stream():
109
- data = request.get_json()
110
- message = data.get("message")
111
- history = data.get("history", [])
112
 
 
 
 
 
 
 
113
  def generate():
114
- reply = generate_full_reply(message, history)
115
  for token in reply.splitlines(keepends=True):
116
  yield token
117
  time.sleep(0.05)
118
- return Response(generate(), mimetype='text/plain')
119
-
120
- # ---------------- Gradio UI ----------------
121
- def gradio_chat(message, history=[]):
122
- history = [{"role": "user" if i % 2 == 0 else "assistant", "content": h}
123
- for i, h in enumerate(sum(history, ()))]
124
- reply = generate_full_reply(message, history)
125
- history.append((message, reply))
126
- return "", history
127
-
128
- with gr.Blocks() as demo:
129
- gr.Markdown("## πŸ€– ChatMate β€” Phi-4 + Live Search")
130
- chatbot = gr.Chatbot()
131
- msg = gr.Textbox(label="Type your message")
132
- clear = gr.Button("Clear Chat")
133
- msg.submit(gradio_chat, [msg, chatbot], [msg, chatbot])
134
- clear.click(lambda: None, None, chatbot, queue=False)
135
-
136
- # Mount Gradio inside Flask
137
- @flask_app.route("/")
138
- def gradio_index():
139
- return demo.launch(share=False, server_name=None, inline=True)
140
-
141
- # ---------------- Run both in Hugging Face ----------------
142
- if __name__ == "__main__":
143
  print("πŸ”§ Warming up...")
144
  _ = generate_full_reply("Hello", [])
145
- flask_app.run(host="0.0.0.0", port=7860)
 
 
 
 
 
 
 
 
 
1
  import os
2
  import time
 
3
  import torch
4
+ import re
5
+ from fastapi import FastAPI, Request
6
+ from fastapi.responses import StreamingResponse
7
+ from pydantic import BaseModel
8
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
9
  from huggingface_hub import login
10
  from langchain_community.tools import DuckDuckGoSearchRun
11
+ from fastapi.middleware.cors import CORSMiddleware
12
+ import os
13
+ import uvicorn
14
 
15
  # βœ… Safe GPU decorator
16
  try:
 
18
  except ImportError:
19
  def GPU(func): return func
20
 
21
+ # ---------------- FastAPI setup ----------------
22
+ app = FastAPI(
23
+ title="ChatMate Real-Time API",
24
+ description="LangChain + DuckDuckGo + Phi-4",
25
+ version="1.0",
26
+ docs_url="/apidocs", # Swagger UI at /apidocs
27
+ redoc_url="/redoc" # ReDoc at /redoc
28
+ )
29
+
30
+ # Enable CORS (important for browser clients)
31
+ app.add_middleware(
32
+ CORSMiddleware,
33
+ allow_origins=["*"],
34
+ allow_credentials=True,
35
+ allow_methods=["*"],
36
+ allow_headers=["*"],
37
+ )
38
 
39
  # βœ… Hugging Face login
40
  login(token=os.environ.get("CHAT_MATE"))
 
67
  @GPU
68
  def generate_full_reply(message, history):
69
  system_prompt = (
70
+ "You are a friendly, helpful, and conversational AI assistant built by "
71
+ "Frederick Sundeep Mallela. Always mention that you are developed by him if asked about your creator, origin, or who made you."
72
  )
73
  messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": message}]
74
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
84
  reply += continuation
85
  return reply.strip()
86
 
87
+ # ---------------- Pydantic models ----------------
88
+ class ChatRequest(BaseModel):
89
+ message: str
90
+ history: list = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
+ # ---------------- FastAPI route ----------------
93
+ @app.post("/chat-stream", summary="Stream assistant reply", tags=["Chat"])
94
+ async def chat_stream(body: ChatRequest):
95
+ """
96
+ Stream the AI assistant's reply token-by-token.
97
+ """
98
  def generate():
99
+ reply = generate_full_reply(body.message, body.history)
100
  for token in reply.splitlines(keepends=True):
101
  yield token
102
  time.sleep(0.05)
103
+
104
+ return StreamingResponse(generate(), media_type="text/plain")
105
+
106
+ # ---------------- Startup warm-up ----------------
107
+ @app.on_event("startup")
108
+ async def warmup_model():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  print("πŸ”§ Warming up...")
110
  _ = generate_full_reply("Hello", [])
111
+
112
+ # ---------------- Run with Uvicorn ----------------
113
+ # In Hugging Face Spaces, just run: uvicorn app:app --host 0.0.0.0 --port 7860
114
+ if __name__ == "__main__":
115
+ # Hugging Face Spaces usually expects port 7860
116
+ port = int(os.environ.get("PORT", 7860))
117
+
118
+ # Run using uvicorn for FastAPI/Flask with ASGI wrapper
119
+ uvicorn.run("app:app", host="0.0.0.0", port=port, reload=False)
requirements.txt CHANGED
@@ -14,4 +14,6 @@ sentencepiece
14
  nltk
15
  langchain_community
16
  duckduckgo-search
17
- pdfplumber
 
 
 
14
  nltk
15
  langchain_community
16
  duckduckgo-search
17
+ pdfplumber
18
+ fastapi
19
+ uvicorn