Spaces:

DavidBazaldua
/

iris

Sleeping

App Files Files Community

DavidBazaldua commited on Dec 5, 2025

Commit

d5b6e77

verified ·

1 Parent(s): b84dcec

Update app.py

Browse files

Files changed (1) hide show

app.py +196 -62

app.py CHANGED Viewed

@@ -1,70 +1,204 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    hf_token: gr.OAuthToken,
-):
     """
-    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
     """
-    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
-    messages = [{"role": "system", "content": system_message}]
-    messages.extend(history)
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        choices = message.choices
-        token = ""
-        if len(choices) and choices[0].delta.content:
-            token = choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-chatbot = gr.ChatInterface(
-    respond,
-    type="messages",
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
-with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.LoginButton()
-    chatbot.render()
 if __name__ == "__main__":
     demo.launch()

+import os
+import torch
 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# 1. Configuración básica
+MODEL_ID = "DavidBazaldua/llama-iris-finetuned"
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
+# 2. Carga de tokenizer y modelo
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_ID,
+    torch_dtype=DTYPE,
+    device_map="auto" if DEVICE == "cuda" else None
+)
+if DEVICE == "cpu":
+    model.to(DEVICE)
+# 3. System prompt por defecto (lo puedes personalizar)
+DEFAULT_SYSTEM_PROMPT = (
+    "You are Iris, a helpful, kind, and concise AI assistant. "
+    "You answer in the same language as the user and you explain things clearly. "
+    "If the user is Miriam, you can hablarle en confianza como amiga :)"
+)
+# 4. Función para construir el prompt con contexto + historial
+def build_prompt(system_prompt, context, history, user_message):
     """
+    system_prompt: texto de instrucciones del sistema.
+    context: contexto adicional que el usuario pega (documentos, notas, etc).
+    history: lista de pares (user, assistant).
+    user_message: mensaje actual del usuario.
     """
+    # Encabezado tipo Llama chat
+    prompt_parts = []
+    # System
+    if system_prompt:
+        prompt_parts.append(f"<|system|>\n{system_prompt}\n")
+    # Contexto extra
+    if context:
+        prompt_parts.append(
+            "<|system|>\nThe following is extra context that may be useful. "
+            "Use it to answer the user if relevant:\n"
+            f"{context}\n"
+        )
+    # Historial
+    for old_user, old_assistant in history:
+        prompt_parts.append(f"<|user|>\n{old_user}\n")
+        prompt_parts.append(f"<|assistant|>\n{old_assistant}\n")
+    # Mensaje actual
+    prompt_parts.append(f"<|user|>\n{user_message}\n")
+    prompt_parts.append("<|assistant|>\n")
+    full_prompt = "".join(prompt_parts)
+    return full_prompt
+# 5. Función de generación
+def generate_answer(system_prompt, context, message, history, max_tokens, temperature, top_p):
+    # history viene como lista de listas: [[user, assistant], [user, assistant], ...]
+    # Gradio suele usar este formato.
+    if system_prompt is None or system_prompt.strip() == "":
+        system_prompt = DEFAULT_SYSTEM_PROMPT
+    prompt = build_prompt(system_prompt, context, history, message)
+    inputs = tokenizer(
+        prompt,
+        return_tensors="pt",
+        add_special_tokens=False
+    ).to(DEVICE)
+    with torch.no_grad():
+        output_tokens = model.generate(
+            **inputs,
+            max_new_tokens=int(max_tokens),
+            do_sample=True,
+            temperature=float(temperature),
+            top_p=float(top_p),
+            pad_token_id=tokenizer.eos_token_id
+        )
+    # Cortamos el prompt inicial y nos quedamos sólo con la respuesta nueva
+    generated_text = tokenizer.decode(output_tokens[0], skip_special_tokens=False)
+    # Buscamos el último tag de <|assistant|> y tomamos lo que sigue
+    split_token = "<|assistant|>"
+    if split_token in generated_text:
+        answer = generated_text.split(split_token)[-1]
+    else:
+        # fallback: todo el texto (no ideal, pero por si acaso)
+        answer = generated_text
+    # Limpieza sencilla
+    answer = answer.replace("</s>", "").strip()
+    # Actualizamos historial: agregamos el último turno
+    history = history + [[message, answer]]
+    return answer, history
+# 6. Función wrapper para Gradio (usa el historial del Chatbot)
+def chat_fn(message, history, system_prompt, context, max_tokens, temperature, top_p):
+    if history is None:
+        history = []
+    answer, history = generate_answer(system_prompt, context, message, history, max_tokens, temperature, top_p)
+    return answer, history
+# 7. Construcción de la UI en Gradio
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+        # ✨ Iris – Tu modelo finetuneado
+        Chatea con tu modelo, agrega contexto y ajusta el comportamiento del sistema.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=3):
+            chatbot = gr.Chatbot(
+                label="Chat con Iris",
+                height=450
+            )
+            msg = gr.Textbox(
+                label="Mensaje",
+                placeholder="Escribe aquí tu pregunta...",
+            )
+            send_btn = gr.Button("Enviar ✉️", variant="primary")
+        with gr.Column(scale=2):
+            system_prompt_box = gr.Textbox(
+                label="System prompt",
+                value=DEFAULT_SYSTEM_PROMPT,
+                lines=6
+            )
+            context_box = gr.Textbox(
+                label="Contexto adicional (opcional)",
+                placeholder="Pega aquí notas, documentos o datos que quieras que Iris use como contexto.",
+                lines=10
+            )
+            max_tokens_slider = gr.Slider(
+                label="Máx. tokens de respuesta",
+                minimum=64,
+                maximum=2048,
+                value=512,
+                step=32
+            )
+            temperature_slider = gr.Slider(
+                label="Temperature",
+                minimum=0.1,
+                maximum=1.5,
+                value=0.7,
+                step=0.1
+            )
+            top_p_slider = gr.Slider(
+                label="Top-p",
+                minimum=0.1,
+                maximum=1.0,
+                value=0.9,
+                step=0.05
+            )
+            clear_btn = gr.Button("Limpiar historial 🧹")
+    # Eventos
+    def user_submit(user_message, chat_history):
+        # Sólo para mostrar de inmediato el mensaje del usuario
+        if chat_history is None:
+            chat_history = []
+        return "", chat_history + [[user_message, None]]
+    send_btn.click(
+        fn=chat_fn,
+        inputs=[msg, chatbot, system_prompt_box, context_box, max_tokens_slider, temperature_slider, top_p_slider],
+        outputs=[chatbot, chatbot],
+    )
+    msg.submit(
+        fn=user_submit,
+        inputs=[msg, chatbot],
+        outputs=[msg, chatbot],
+        queue=False
+    ).then(
+        fn=chat_fn,
+        inputs=[msg, chatbot, system_prompt_box, context_box, max_tokens_slider, temperature_slider, top_p_slider],
+        outputs=[chatbot, chatbot],
+    )
+    clear_btn.click(
+        lambda: [],
+        None,
+        chatbot
+    )
+# 8. Lanzar la app (HF Spaces la llama con `python app.py`)
 if __name__ == "__main__":
     demo.launch()