Spaces:
Runtime error
Runtime error
| import os | |
| import time | |
| from flask import Flask, request, render_template, jsonify, Response | |
| from flasgger import Swagger, swag_from | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| from huggingface_hub import login | |
| from langchain_community.tools import DuckDuckGoSearchRun | |
| # β Safe import of GPU decorator | |
| try: | |
| from spaces import GPU | |
| except ImportError: | |
| def GPU(func): return func | |
| # Flask + Swagger setup | |
| app = Flask(__name__, static_folder="static", template_folder="templates") | |
| swagger = Swagger(app, template={ | |
| "swagger": "2.0", | |
| "info": { | |
| "title": "ChatMate Real-Time API", | |
| "description": "LangChain + DuckDuckGo enabled AI chatbot", | |
| "version": "1.0" | |
| } | |
| }, config={ | |
| "headers": [], | |
| "specs": [{"endpoint": 'apispec', "route": '/apispec.json', "rule_filter": lambda rule: True}], | |
| "static_url_path": "/flasgger_static", | |
| "swagger_ui": True, | |
| "specs_route": "/apidocs/" | |
| }) | |
| # β Hugging Face login (if token provided) | |
| login(token=os.environ.get("CHAT_MATE")) | |
| # β Load LLaMA 3.1 Instruct model | |
| model_id = "meta-llama/Llama-3.1-8B-Instruct" | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="auto") | |
| pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512) | |
| # β Simple keyword-based check for real-time info | |
| REAL_TIME_KEYWORDS = {"latest", "current", "news", "today", "price", "time", "live", "trending", "update", "happening"} | |
| def should_search(message): | |
| message = message.lower() | |
| return any(kw in message for kw in REAL_TIME_KEYWORDS) | |
| # β Search tool | |
| search_tool = DuckDuckGoSearchRun() | |
| # β Chat using model with chat template and history | |
| def generate_full_reply(message, history): | |
| system_prompt = "You are a helpful and concise AI assistant." | |
| messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": message}] | |
| prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| output = pipe(prompt, do_sample=True, temperature=0.7)[0]["generated_text"] | |
| return output.split(prompt)[-1].strip() | |
| # β Flask route | |
| def home(): | |
| return render_template("index.html") | |
| def chat(): | |
| data = request.get_json() | |
| message = data.get("message") | |
| history = data.get("history", []) | |
| # Check if real-time search is needed | |
| if should_search(message): | |
| result = f"(Live info) {search_tool.run(message)}" | |
| else: | |
| result = generate_full_reply(message, history) | |
| return jsonify({"reply": result}) | |
| def chat_stream(): | |
| data = request.get_json() | |
| message = data.get("message") | |
| history = data.get("history", []) | |
| def generate(): | |
| # if should_search(message): | |
| # reply = f"(Live info) {search_tool.run(message)}" | |
| # else: | |
| reply = generate_full_reply(message, history) | |
| for token in reply.splitlines(keepends=True): | |
| yield token | |
| time.sleep(0.05) | |
| return Response(generate(), mimetype='text/plain') | |
| # β Warm-up on startup | |
| if __name__ == "__main__": | |
| print("π§ Warming up...") | |
| _ = generate_full_reply("Hello", []) | |
| app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 7860))) | |