Spaces:

Chompi10
/

asddsad

Runtime error

App Files Files Community

Chompi10 commited on Feb 18

Commit

973859b

verified ·

1 Parent(s): 3c2f1c9

Update app.py

Browse files

Files changed (1) hide show

app.py +193 -191

app.py CHANGED Viewed

@@ -1,245 +1,247 @@
-import os
-import torch
-from flask import Flask, request, jsonify, Response
-from transformers import AutoTokenizer, AutoModelForCausalLM
-# ===============================
-# CONFIGURACIÓN BÁSICA
-# ===============================
-app = Flask(__name__)
-MODEL_NAME = "microsoft/phi-2"
-MAX_NEW_TOKENS = 250
-print("Cargando modelo...")
-torch.set_num_threads(4)
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
-    torch_dtype=torch.float32,
-    low_cpu_mem_usage=True
 )
-model.eval()
-print("Modelo cargado correctamente.")
-# ===============================
-# PROMPTS PROFESIONALES
-# ===============================
-GENERATOR_PROMPT = """
-You are a Senior Software Engineer.
-Always respond in the same language as the user.
-Generate:
-- Clean
-- Secure
-- Optimized
-- Production-ready code
-Explain briefly what the code does.
-Include best practices.
-Use markdown triple backticks for code blocks.
-"""
-ULTRA_REVIEW_PROMPT = """
-You are a Principal Software Architect and Security Engineer.
-Always respond in the same language as the user.
-Perform an EXTREMELY CRITICAL and PROFESSIONAL code review.
-Analyze:
-1. Logical correctness
-2. Edge cases
-3. Security vulnerabilities (OWASP mindset)
-4. Input validation
-5. Authentication flaws
-6. Concurrency risks
-7. Memory inefficiencies
-8. Performance bottlenecks
-9. Scalability risks
-10. SOLID violations
-11. Clean Architecture violations
-12. Code smells
-13. Dependency risks
-14. Error handling weaknesses
-15. Logging gaps
-16. Deployment risks
-Return structured sections with clear headings.
-"""
-ARCHITECTURE_ANALYSIS_PROMPT = """
-You are a Senior Software Architect.
-Always respond in the same language as the user.
-Analyze the provided system from an ARCHITECTURAL perspective.
-Evaluate:
-- Layered architecture
-- Coupling and cohesion
-- API design
-- Data flow
-- Scalability model
-- Fault tolerance
-- Cloud readiness
-- Observability
-- Caching strategy
-Return structured response with headings.
 """
-# ===============================
-# FRONTEND (CHAT UI)
-# ===============================
 @app.route("/")
 def index():
-    return Response("""
 <!DOCTYPE html>
 <html>
 <head>
 <meta charset="UTF-8">
-<title>AI Engineer Assistant</title>
-<style>
-body { margin:0; font-family:Arial; background:#343541; display:flex; justify-content:center; align-items:center; height:100vh; }
-.container { width:900px; height:92vh; background:#444654; display:flex; flex-direction:column; border-radius:10px; }
-.chat { flex:1; padding:20px; overflow-y:auto; display:flex; flex-direction:column; }
-.msg { padding:12px; border-radius:8px; margin-bottom:12px; max-width:85%; white-space:pre-wrap; }
-.user { background:#19c37d; align-self:flex-end; color:black; }
-.bot { background:#555869; align-self:flex-start; color:white; }
-.input-area { display:flex; border-top:1px solid #555; }
-input, select { padding:15px; border:none; outline:none; font-size:16px; }
-input { flex:1; }
-button { padding:15px; border:none; background:#19c37d; font-weight:bold; cursor:pointer; }
-button:hover { opacity:0.9; }
-select { background:#555869; color:white; }
-</style>
-</head>
-<body>
-<div class="container">
-    <div id="chat" class="chat"></div>
-    <div class="input-area">
-        <select id="mode">
-            <option value="generate">Generate Code</option>
-            <option value="review">Ultra Review</option>
-            <option value="architecture">Architecture Analysis</option>
-        </select>
-        <input id="input" placeholder="Write your message..." />
-        <button onclick="send()">Send</button>
-    </div>
-</div>
-<script>
-async function send(){
-    const input = document.getElementById("input");
-    const chat = document.getElementById("chat");
-    const mode = document.getElementById("mode").value;
-    const text = input.value.trim();
-    if(!text) return;
-    add(text,"user");
-    input.value="";
-    const loading = add("Thinking...","bot");
-    try{
-        const res = await fetch("/chat",{
-            method:"POST",
-            headers:{"Content-Type":"application/json"},
-            body:JSON.stringify({message:text, mode:mode})
-        });
-        const data = await res.json();
-        loading.remove();
-        add(data.response || "Error","bot");
-    }catch(e){
-        loading.remove();
-        add("Server error","bot");
-    }
 }
-function add(text,type){
-    const chat=document.getElementById("chat");
-    const div=document.createElement("div");
-    div.className="msg "+type;
-    div.innerText=text;
-    chat.appendChild(div);
-    chat.scrollTop=chat.scrollHeight;
-    return div;
 }
-document.getElementById("input").addEventListener("keypress",function(e){
-    if(e.key==="Enter"){ send(); }
-});
-</script>
-</body>
-</html>
-""", mimetype="text/html")
-# ===============================
-# API CHAT
-# ===============================
-@app.route("/chat", methods=["POST"])
-def chat():
-    try:
-        data = request.get_json()
-        user_input = data.get("message", "")
-        mode = data.get("mode", "generate")
-        if not user_input:
-            return jsonify({"error": "Empty message"}), 400
-        # Selección de prompt según modo
-        if mode == "review":
-            system_prompt = ULTRA_REVIEW_PROMPT
-        elif mode == "architecture":
-            system_prompt = ARCHITECTURE_ANALYSIS_PROMPT
-        else:
-            system_prompt = GENERATOR_PROMPT
-        prompt = f"{system_prompt}\nUser: {user_input}\nAssistant:"
-        inputs = tokenizer(prompt, return_tensors="pt")
-        with torch.inference_mode():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=MAX_NEW_TOKENS,
-                do_sample=True,
-                temperature=0.5,
-                top_p=0.9,
-                repetition_penalty=1.15,
-                no_repeat_ngram_size=3,
-                pad_token_id=tokenizer.eos_token_id
-            )
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        response = response.split("Assistant:")[-1].strip()
-        return jsonify({"response": response})
-    except Exception as e:
-        return jsonify({"error": str(e)}), 500
-# ===============================
-# START SERVER
-# ===============================
 if __name__ == "__main__":
-    port = int(os.environ.get("PORT", 7860))
-    app.run(host="0.0.0.0", port=port)

+# ==============================
+# IMPORTS
+# ==============================
+from flask import Flask, request, Response  # Servidor web y streaming
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer  # Modelo IA
+import torch  # Motor de ejecución del modelo
+import threading  # Para ejecutar el modelo en segundo plano
+import json  # Para manejar datos JSON
+# ==============================
+# CONFIGURACIÓN DEL MODELO
+# ==============================
+MODEL_NAME = "microsoft/phi-2"  # Modelo que vamos a usar
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)  # Descarga el tokenizador
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32  # Usa GPU si existe
 )
+device = "cuda" if torch.cuda.is_available() else "cpu"  # Detecta GPU
+model.to(device)  # Mueve el modelo al dispositivo
+# ==============================
+# CREAR SERVIDOR FLASK
+# ==============================
+app = Flask(__name__)  # Inicializa el servidor
+# ==============================
+# FUNCION STREAMING IA
+# ==============================
+def generate_stream(prompt):
+    """
+    Genera texto en streaming token por token
+    """
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)  # Convierte texto en tensores
+    streamer = TextIteratorStreamer(
+        tokenizer,
+        skip_prompt=True,  # No repite el prompt
+        skip_special_tokens=True  # Quita tokens especiales
+    )
+    # Ejecuta el modelo en segundo plano
+    thread = threading.Thread(
+        target=model.generate,
+        kwargs={
+            "inputs": inputs["input_ids"],  # Texto convertido
+            "attention_mask": inputs["attention_mask"],
+            "max_new_tokens": 300,  # Máximo de tokens a generar
+            "temperature": 0.7,  # Creatividad
+            "top_p": 0.9,  # Diversidad
+            "do_sample": True,  # Activa aleatoriedad
+            "streamer": streamer  # Activa streaming
+        }
+    )
+    thread.start()  # Inicia generación
+    # Devuelve token por token en tiempo real
+    for new_text in streamer:
+        yield new_text
+# ==============================
+# API CHAT (POST /chat)
+# ==============================
+@app.route("/chat", methods=["POST"])
+def chat():
+    """
+    Endpoint que recibe mensaje y responde en streaming
+    """
+    data = request.json  # Lee JSON enviado
+    user_message = data.get("message", "")  # Extrae mensaje
+    # Prompt multi-lenguaje
+    prompt = f"""
+You are a professional AI assistant.
+Detect the language of the user automatically and answer in the same language.
+Be clear and structured.
+User: {user_message}
+Assistant:
 """
+    return Response(
+        generate_stream(prompt),
+        mimetype="text/plain"  # Streaming tipo texto
+    )
+# ==============================
+# FRONTEND CHAT ESTILO CHATGPT
+# ==============================
 @app.route("/")
 def index():
+    """
+    Devuelve HTML completo del chat
+    """
+    return """
 <!DOCTYPE html>
 <html>
 <head>
 <meta charset="UTF-8">
+<title>AI Chat</title>
+<style>
+body {
+    margin:0;
+    font-family: Arial;
+    background-color:#343541;
+    color:white;
+    display:flex;
+    flex-direction:column;
+    height:100vh;
+}
+#chat {
+    flex:1;
+    padding:20px;
+    overflow-y:auto;
+}
+.message {
+    margin-bottom:15px;
+    padding:10px 15px;
+    border-radius:10px;
+    max-width:70%;
+    white-space:pre-wrap;
+}
+.user {
+    background:#0b93f6;
+    align-self:flex-end;
+}
+.bot {
+    background:#444654;
+    align-self:flex-start;
 }
+#input-area {
+    display:flex;
+    padding:15px;
+    background:#202123;
 }
+input {
+    flex:1;
+    padding:10px;
+    border-radius:5px;
+    border:none;
+    font-size:16px;
+}
+button {
+    margin-left:10px;
+    padding:10px 20px;
+    border:none;
+    border-radius:5px;
+    background:#19c37d;
+    color:white;
+    font-weight:bold;
+    cursor:pointer;
+}
+</style>
+</head>
+<body>
+<div id="chat"></div>
+<div id="input-area">
+    <input id="message" placeholder="Escribe tu mensaje..." />
+    <button onclick="send()">Enviar</button>
+</div>
+<script>
+async function send() {
+    const input = document.getElementById("message");
+    const text = input.value;
+    if (!text) return;
+    input.value = "";
+    const chat = document.getElementById("chat");
+    // Mostrar mensaje usuario
+    const userDiv = document.createElement("div");
+    userDiv.className = "message user";
+    userDiv.textContent = text;
+    chat.appendChild(userDiv);
+    // Crear mensaje bot vacío
+    const botDiv = document.createElement("div");
+    botDiv.className = "message bot";
+    botDiv.textContent = "";
+    chat.appendChild(botDiv);
+    chat.scrollTop = chat.scrollHeight;
+    // Enviar al backend
+    const response = await fetch("/chat", {
+        method:"POST",
+        headers:{"Content-Type":"application/json"},
+        body: JSON.stringify({message:text})
+    });
+    const reader = response.body.getReader();
+    const decoder = new TextDecoder();
+    // Streaming en tiempo real
+    while (true) {
+        const {done, value} = await reader.read();
+        if (done) break;
+        botDiv.textContent += decoder.decode(value);
+        chat.scrollTop = chat.scrollHeight;
+    }
+}
+</script>
+</body>
+</html>
+"""
+# ==============================
+# INICIAR SERVIDOR
+# ==============================
 if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=5000, threaded=True)