AIChatMate / app.py
FrederickSundeep's picture
Update app.py
21d088c verified
import os
import time
from flask import Flask, request, render_template, jsonify, Response
from flasgger import Swagger, swag_from
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from huggingface_hub import login
from langchain_community.tools import DuckDuckGoSearchRun
# βœ… Safe import of GPU decorator
try:
from spaces import GPU
except ImportError:
def GPU(func): return func
# Flask + Swagger setup
app = Flask(__name__, static_folder="static", template_folder="templates")
swagger = Swagger(app, template={
"swagger": "2.0",
"info": {
"title": "ChatMate Real-Time API",
"description": "LangChain + DuckDuckGo enabled AI chatbot",
"version": "1.0"
}
}, config={
"headers": [],
"specs": [{"endpoint": 'apispec', "route": '/apispec.json', "rule_filter": lambda rule: True}],
"static_url_path": "/flasgger_static",
"swagger_ui": True,
"specs_route": "/apidocs/"
})
# βœ… Hugging Face login (if token provided)
login(token=os.environ.get("CHAT_MATE"))
# βœ… Load LLaMA 3.1 Instruct model
model_id = "meta-llama/Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="auto")
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
# βœ… Simple keyword-based check for real-time info
REAL_TIME_KEYWORDS = {"latest", "current", "news", "today", "price", "time", "live", "trending", "update", "happening"}
def should_search(message):
message = message.lower()
return any(kw in message for kw in REAL_TIME_KEYWORDS)
# βœ… Search tool
search_tool = DuckDuckGoSearchRun()
# βœ… Chat using model with chat template and history
@GPU
def generate_full_reply(message, history):
system_prompt = "You are a helpful and concise AI assistant."
messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": message}]
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
output = pipe(prompt, do_sample=True, temperature=0.7)[0]["generated_text"]
return output.split(prompt)[-1].strip()
# βœ… Flask route
@app.route("/")
def home():
return render_template("index.html")
@app.route("/chat", methods=["POST"])
@swag_from({
'tags': ['Chat'],
'consumes': ['application/json'],
'summary': 'Get assistant reply',
'description': 'Send a message and chat history, and receive a full AI-generated response.',
'parameters': [{
'name': 'body',
'in': 'body',
'required': True,
'schema': {
'type': 'object',
'properties': {
'message': {'type': 'string', 'example': 'What is Python?'},
'history': {
'type': 'array',
'items': {
'type': 'object',
'properties': {
'role': {'type': 'string', 'example': 'user'},
'content': {'type': 'string', 'example': 'Tell me about Python'}
}
}
}
},
'required': ['message']
}
}],
'responses': {
200: {
'description': 'Assistant reply',
'schema': {
'type': 'object',
'properties': {
'reply': {'type': 'string'}
}
}
}
}
})
def chat():
data = request.get_json()
message = data.get("message")
history = data.get("history", [])
# Check if real-time search is needed
if should_search(message):
result = f"(Live info) {search_tool.run(message)}"
else:
result = generate_full_reply(message, history)
return jsonify({"reply": result})
@app.route("/chat-stream", methods=["POST"])
@swag_from({
'tags': ['Chat'],
'consumes': ['application/json'],
'summary': 'Stream assistant reply',
'description': 'Send a message and history, receive AI-generated text as a stream (token by token).',
'parameters': [{
'name': 'body',
'in': 'body',
'required': True,
'schema': {
'type': 'object',
'properties': {
'message': {'type': 'string', 'example': 'Explain quantum computing.'},
'history': {
'type': 'array',
'items': {
'type': 'object',
'properties': {
'role': {'type': 'string', 'example': 'user'},
'content': {'type': 'string', 'example': 'What is entanglement?'}
}
}
}
},
'required': ['message']
}
}],
'responses': {
200: {
'description': 'Streamed reply',
'content': {'text/plain': {}}
}
}
})
def chat_stream():
data = request.get_json()
message = data.get("message")
history = data.get("history", [])
def generate():
# if should_search(message):
# reply = f"(Live info) {search_tool.run(message)}"
# else:
reply = generate_full_reply(message, history)
for token in reply.splitlines(keepends=True):
yield token
time.sleep(0.05)
return Response(generate(), mimetype='text/plain')
# βœ… Warm-up on startup
if __name__ == "__main__":
print("πŸ”§ Warming up...")
_ = generate_full_reply("Hello", [])
app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))