|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
import warnings |
|
|
from flask import Flask, request, Response |
|
|
from transformers import AutoTokenizer, AutoModel, TextIteratorStreamer |
|
|
import torch |
|
|
import threading |
|
|
import json |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model = AutoModel.from_pretrained("unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF", dtype="auto") |
|
|
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
MODEL_NAME, |
|
|
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32 |
|
|
) |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
model.to(device) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app = Flask(__name__) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_stream(prompt): |
|
|
""" |
|
|
Genera texto en streaming token por token |
|
|
""" |
|
|
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(device) |
|
|
|
|
|
streamer = TextIteratorStreamer( |
|
|
tokenizer, |
|
|
skip_prompt=True, |
|
|
skip_special_tokens=False |
|
|
) |
|
|
|
|
|
|
|
|
thread = threading.Thread( |
|
|
target=model.generate, |
|
|
kwargs={ |
|
|
"inputs": inputs["input_ids"], |
|
|
"attention_mask": inputs["attention_mask"], |
|
|
"max_new_tokens": 300, |
|
|
"temperature": 0.5, |
|
|
"top_p": 0.5, |
|
|
"do_sample": False, |
|
|
"streamer": streamer |
|
|
} |
|
|
) |
|
|
|
|
|
thread.start() |
|
|
|
|
|
|
|
|
for new_text in streamer: |
|
|
yield new_text |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/chat", methods=["POST"]) |
|
|
def chat(): |
|
|
""" |
|
|
Endpoint que recibe mensaje y responde en streaming |
|
|
""" |
|
|
|
|
|
data = request.json |
|
|
user_message = data.get("message", "") |
|
|
|
|
|
|
|
|
prompt = f""" |
|
|
You are a professional AI assistant. |
|
|
Detect the language of the user automatically and answer in the same language. |
|
|
Be clear and structured. |
|
|
|
|
|
User: {user_message} |
|
|
Assistant: |
|
|
""" |
|
|
|
|
|
return Response( |
|
|
generate_stream(prompt), |
|
|
mimetype="text/plain" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/") |
|
|
def index(): |
|
|
""" |
|
|
Devuelve HTML completo del chat |
|
|
""" |
|
|
|
|
|
return """ |
|
|
<!DOCTYPE html> |
|
|
<html> |
|
|
<head> |
|
|
<meta charset="UTF-8"> |
|
|
<title>AI Chat</title> |
|
|
|
|
|
<style> |
|
|
body { |
|
|
margin:0; |
|
|
font-family: Arial; |
|
|
background-color:#343541; |
|
|
color:white; |
|
|
display:flex; |
|
|
flex-direction:column; |
|
|
height:100vh; |
|
|
} |
|
|
|
|
|
#chat { |
|
|
flex:1; |
|
|
padding:20px; |
|
|
overflow-y:auto; |
|
|
} |
|
|
|
|
|
.message { |
|
|
margin-bottom:15px; |
|
|
padding:10px 15px; |
|
|
border-radius:10px; |
|
|
max-width:70%; |
|
|
white-space:pre-wrap; |
|
|
} |
|
|
|
|
|
.user { |
|
|
background:#0b93f6; |
|
|
align-self:flex-end; |
|
|
} |
|
|
|
|
|
.bot { |
|
|
background:#444654; |
|
|
align-self:flex-start; |
|
|
} |
|
|
|
|
|
#input-area { |
|
|
display:flex; |
|
|
padding:15px; |
|
|
background:#202123; |
|
|
} |
|
|
|
|
|
input { |
|
|
flex:1; |
|
|
padding:10px; |
|
|
border-radius:5px; |
|
|
border:none; |
|
|
font-size:16px; |
|
|
} |
|
|
|
|
|
button { |
|
|
margin-left:10px; |
|
|
padding:10px 20px; |
|
|
border:none; |
|
|
border-radius:5px; |
|
|
background:#19c37d; |
|
|
color:white; |
|
|
font-weight:bold; |
|
|
cursor:pointer; |
|
|
} |
|
|
</style> |
|
|
</head> |
|
|
|
|
|
<body> |
|
|
|
|
|
<div id="chat"></div> |
|
|
|
|
|
<div id="input-area"> |
|
|
<input id="message" placeholder="Escribe tu mensaje..." /> |
|
|
<button onclick="send()">Enviar</button> |
|
|
</div> |
|
|
|
|
|
<script> |
|
|
|
|
|
async function send() { |
|
|
|
|
|
const input = document.getElementById("message"); |
|
|
const text = input.value; |
|
|
if (!text) return; |
|
|
|
|
|
input.value = ""; |
|
|
|
|
|
const chat = document.getElementById("chat"); |
|
|
|
|
|
// Mostrar mensaje usuario |
|
|
const userDiv = document.createElement("div"); |
|
|
userDiv.className = "message user"; |
|
|
userDiv.textContent = text; |
|
|
chat.appendChild(userDiv); |
|
|
|
|
|
// Crear mensaje bot vacío |
|
|
const botDiv = document.createElement("div"); |
|
|
botDiv.className = "message bot"; |
|
|
botDiv.textContent = ""; |
|
|
chat.appendChild(botDiv); |
|
|
|
|
|
chat.scrollTop = chat.scrollHeight; |
|
|
|
|
|
// Enviar al backend |
|
|
const response = await fetch("/chat", { |
|
|
method:"POST", |
|
|
headers:{"Content-Type":"application/json"}, |
|
|
body: JSON.stringify({message:text}) |
|
|
}); |
|
|
|
|
|
const reader = response.body.getReader(); |
|
|
const decoder = new TextDecoder(); |
|
|
|
|
|
// Streaming en tiempo real |
|
|
while (true) { |
|
|
const {done, value} = await reader.read(); |
|
|
if (done) break; |
|
|
botDiv.textContent += decoder.decode(value); |
|
|
chat.scrollTop = chat.scrollHeight; |
|
|
} |
|
|
} |
|
|
|
|
|
</script> |
|
|
|
|
|
</body> |
|
|
</html> |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
port = int(os.environ.get("PORT", 7860)) |
|
|
app.run(host="0.0.0.0", port=port) |