Spaces:

Chompi10
/

asddsad

Runtime error

App Files Files Community

asddsad / app.py

Chompi10

Update app.py

2980a05 verified 2 days ago

raw

history blame contribute delete

5.62 kB

	# ==============================
	# IMPORTS
	# ==============================
	import os
	import warnings
	from flask import Flask, request, Response # Servidor web y streaming
	from transformers import AutoTokenizer, AutoModel, TextIteratorStreamer # Modelo IA
	import torch # Motor de ejecución del modelo
	import threading # Para ejecutar el modelo en segundo plano
	import json # Para manejar datos JSON


	# ==============================
	# CONFIGURACIÓN DEL MODELO
	# ==============================
	# Load model directly

	model = AutoModel.from_pretrained("unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF", dtype="auto")
	#MODEL_NAME = "microsoft/phi-2" # Modelo que vamos a usar

	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # Descarga el tokenizador
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32 # Usa GPU si existe
	)

	device = "cuda" if torch.cuda.is_available() else "cpu" # Detecta GPU
	model.to(device) # Mueve el modelo al dispositivo



	# ==============================
	# CREAR SERVIDOR FLASK
	# ==============================

	app = Flask(__name__) # Inicializa el servidor


	# ==============================
	# FUNCION STREAMING IA
	# ==============================

	def generate_stream(prompt):
	"""
	Genera texto en streaming token por token
	"""

	inputs = tokenizer(prompt, return_tensors="pt").to(device) # Convierte texto en tensores

	streamer = TextIteratorStreamer(
	tokenizer,
	skip_prompt=True, # No repite el prompt
	skip_special_tokens=False # Quita tokens especiales
	)

	# Ejecuta el modelo en segundo plano
	thread = threading.Thread(
	target=model.generate,
	kwargs={
	"inputs": inputs["input_ids"], # Texto convertido
	"attention_mask": inputs["attention_mask"],
	"max_new_tokens": 300, # Máximo de tokens a generar
	"temperature": 0.5, # Creatividad
	"top_p": 0.5, # Diversidad
	"do_sample": False, # Activa aleatoriedad
	"streamer": streamer # Activa streaming
	}
	)

	thread.start() # Inicia generación

	# Devuelve token por token en tiempo real
	for new_text in streamer:
	yield new_text


	# ==============================
	# API CHAT (POST /chat)
	# ==============================

	@app.route("/chat", methods=["POST"])
	def chat():
	"""
	Endpoint que recibe mensaje y responde en streaming
	"""

	data = request.json # Lee JSON enviado
	user_message = data.get("message", "") # Extrae mensaje

	# Prompt multi-lenguaje
	prompt = f"""
	You are a professional AI assistant.
	Detect the language of the user automatically and answer in the same language.
	Be clear and structured.

	User: {user_message}
	Assistant:
	"""

	return Response(
	generate_stream(prompt),
	mimetype="text/plain" # Streaming tipo texto
	)


	# ==============================
	# FRONTEND CHAT ESTILO CHATGPT
	# ==============================

	@app.route("/")
	def index():
	"""
	Devuelve HTML completo del chat
	"""

	return """
	<!DOCTYPE html>
	<html>
	<head>
	<meta charset="UTF-8">
	<title>AI Chat</title>

	<style>
	body {
	margin:0;
	font-family: Arial;
	background-color:#343541;
	color:white;
	display:flex;
	flex-direction:column;
	height:100vh;
	}

	#chat {
	flex:1;
	padding:20px;
	overflow-y:auto;
	}

	.message {
	margin-bottom:15px;
	padding:10px 15px;
	border-radius:10px;
	max-width:70%;
	white-space:pre-wrap;
	}

	.user {
	background:#0b93f6;
	align-self:flex-end;
	}

	.bot {
	background:#444654;
	align-self:flex-start;
	}

	#input-area {
	display:flex;
	padding:15px;
	background:#202123;
	}

	input {
	flex:1;
	padding:10px;
	border-radius:5px;
	border:none;
	font-size:16px;
	}

	button {
	margin-left:10px;
	padding:10px 20px;
	border:none;
	border-radius:5px;
	background:#19c37d;
	color:white;
	font-weight:bold;
	cursor:pointer;
	}
	</style>
	</head>

	<body>

	<div id="chat"></div>

	<div id="input-area">
	<input id="message" placeholder="Escribe tu mensaje..." />
	<button onclick="send()">Enviar</button>
	</div>

	<script>

	async function send() {

	const input = document.getElementById("message");
	const text = input.value;
	if (!text) return;

	input.value = "";

	const chat = document.getElementById("chat");

	// Mostrar mensaje usuario
	const userDiv = document.createElement("div");
	userDiv.className = "message user";
	userDiv.textContent = text;
	chat.appendChild(userDiv);

	// Crear mensaje bot vacío
	const botDiv = document.createElement("div");
	botDiv.className = "message bot";
	botDiv.textContent = "";
	chat.appendChild(botDiv);

	chat.scrollTop = chat.scrollHeight;

	// Enviar al backend
	const response = await fetch("/chat", {
	method:"POST",
	headers:{"Content-Type":"application/json"},
	body: JSON.stringify({message:text})
	});

	const reader = response.body.getReader();
	const decoder = new TextDecoder();

	// Streaming en tiempo real
	while (true) {
	const {done, value} = await reader.read();
	if (done) break;
	botDiv.textContent += decoder.decode(value);
	chat.scrollTop = chat.scrollHeight;
	}
	}

	</script>

	</body>
	</html>
	"""


	# ==============================
	# INICIAR SERVIDOR
	# ==============================

	if __name__ == "__main__":
	port = int(os.environ.get("PORT", 7860))
	app.run(host="0.0.0.0", port=port)