Spaces:

FrederickSundeep
/

AIChatMate

Runtime error

App Files Files Community

AIChatMate / app.py

FrederickSundeep

Update app.py

21d088c verified 10 months ago

raw

history blame contribute delete

5.72 kB

	import os
	import time
	from flask import Flask, request, render_template, jsonify, Response
	from flasgger import Swagger, swag_from
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
	from huggingface_hub import login
	from langchain_community.tools import DuckDuckGoSearchRun

	# ✅ Safe import of GPU decorator
	try:
	from spaces import GPU
	except ImportError:
	def GPU(func): return func

	# Flask + Swagger setup
	app = Flask(__name__, static_folder="static", template_folder="templates")
	swagger = Swagger(app, template={
	"swagger": "2.0",
	"info": {
	"title": "ChatMate Real-Time API",
	"description": "LangChain + DuckDuckGo enabled AI chatbot",
	"version": "1.0"
	}
	}, config={
	"headers": [],
	"specs": [{"endpoint": 'apispec', "route": '/apispec.json', "rule_filter": lambda rule: True}],
	"static_url_path": "/flasgger_static",
	"swagger_ui": True,
	"specs_route": "/apidocs/"
	})

	# ✅ Hugging Face login (if token provided)
	login(token=os.environ.get("CHAT_MATE"))

	# ✅ Load LLaMA 3.1 Instruct model
	model_id = "meta-llama/Llama-3.1-8B-Instruct"
	tokenizer = AutoTokenizer.from_pretrained(model_id)
	model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="auto")
	pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)

	# ✅ Simple keyword-based check for real-time info
	REAL_TIME_KEYWORDS = {"latest", "current", "news", "today", "price", "time", "live", "trending", "update", "happening"}

	def should_search(message):
	message = message.lower()
	return any(kw in message for kw in REAL_TIME_KEYWORDS)

	# ✅ Search tool
	search_tool = DuckDuckGoSearchRun()

	# ✅ Chat using model with chat template and history
	@GPU
	def generate_full_reply(message, history):
	system_prompt = "You are a helpful and concise AI assistant."
	messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": message}]
	prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	output = pipe(prompt, do_sample=True, temperature=0.7)[0]["generated_text"]
	return output.split(prompt)[-1].strip()

	# ✅ Flask route
	@app.route("/")
	def home():
	return render_template("index.html")

	@app.route("/chat", methods=["POST"])
	@swag_from({
	'tags': ['Chat'],
	'consumes': ['application/json'],
	'summary': 'Get assistant reply',
	'description': 'Send a message and chat history, and receive a full AI-generated response.',
	'parameters': [{
	'name': 'body',
	'in': 'body',
	'required': True,
	'schema': {
	'type': 'object',
	'properties': {
	'message': {'type': 'string', 'example': 'What is Python?'},
	'history': {
	'type': 'array',
	'items': {
	'type': 'object',
	'properties': {
	'role': {'type': 'string', 'example': 'user'},
	'content': {'type': 'string', 'example': 'Tell me about Python'}
	}
	}
	}
	},
	'required': ['message']
	}
	}],
	'responses': {
	200: {
	'description': 'Assistant reply',
	'schema': {
	'type': 'object',
	'properties': {
	'reply': {'type': 'string'}
	}
	}
	}
	}
	})
	def chat():
	data = request.get_json()
	message = data.get("message")
	history = data.get("history", [])

	# Check if real-time search is needed
	if should_search(message):
	result = f"(Live info) {search_tool.run(message)}"
	else:
	result = generate_full_reply(message, history)

	return jsonify({"reply": result})

	@app.route("/chat-stream", methods=["POST"])
	@swag_from({
	'tags': ['Chat'],
	'consumes': ['application/json'],
	'summary': 'Stream assistant reply',
	'description': 'Send a message and history, receive AI-generated text as a stream (token by token).',
	'parameters': [{
	'name': 'body',
	'in': 'body',
	'required': True,
	'schema': {
	'type': 'object',
	'properties': {
	'message': {'type': 'string', 'example': 'Explain quantum computing.'},
	'history': {
	'type': 'array',
	'items': {
	'type': 'object',
	'properties': {
	'role': {'type': 'string', 'example': 'user'},
	'content': {'type': 'string', 'example': 'What is entanglement?'}
	}
	}
	}
	},
	'required': ['message']
	}
	}],
	'responses': {
	200: {
	'description': 'Streamed reply',
	'content': {'text/plain': {}}
	}
	}
	})
	def chat_stream():
	data = request.get_json()
	message = data.get("message")
	history = data.get("history", [])

	def generate():
	# if should_search(message):
	# reply = f"(Live info) {search_tool.run(message)}"
	# else:
	reply = generate_full_reply(message, history)

	for token in reply.splitlines(keepends=True):
	yield token
	time.sleep(0.05)

	return Response(generate(), mimetype='text/plain')

	# ✅ Warm-up on startup
	if __name__ == "__main__":
	print("🔧 Warming up...")
	_ = generate_full_reply("Hello", [])
	app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 7860)))