Spaces:

DavidBazaldua
/

iris

Sleeping

App Files Files Community

DavidBazaldua commited on Dec 5, 2025

Commit

abe09bc

verified ·

1 Parent(s): f7a0f58

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -103

app.py CHANGED Viewed

@@ -11,7 +11,6 @@ MODEL_ID = "DavidBazaldua/llama3_finetuned_transformes"
 DEVICE = "cpu"
 DTYPE = torch.float32
-# Limit CPU threads
 torch.set_num_threads(2)
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
@@ -28,32 +27,28 @@ DEFAULT_SYSTEM_PROMPT = (
     "You are a helpful, precise AI assistant. "
     "Always answer as briefly as possible. "
     "For fact-based questions, answer in one short sentence or a compact bullet list. "
-    "Do not add explanations, background, or restate the question unless the user explicitly asks for it. "
-    "Respond in English unless the user explicitly requests another language."
 )
 # ---------------------------------------------------------------------
 # Prompt building
 # ---------------------------------------------------------------------
 def build_prompt(system_prompt, context, history, user_message):
     messages = []
-    if system_prompt and system_prompt.strip():
         messages.append({"role": "system", "content": system_prompt})
-    if context and context.strip():
-        messages.append(
-            {
-                "role": "system",
-                "content": (
-                    "The following information is additional context. "
-                    "Use it only if it is relevant to the user's request:\n"
-                    f"{context}"
-                ),
-            }
-        )
     for user, assistant in history:
         messages.append({"role": "user", "content": user})
@@ -61,19 +56,18 @@ def build_prompt(system_prompt, context, history, user_message):
     messages.append({"role": "user", "content": user_message})
-    prompt = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
         add_generation_prompt=True,
     )
-    return prompt
 def generate_answer(system_prompt, context, message, history, max_tokens, temperature, top_p):
     if history is None:
         history = []
-    if not system_prompt or system_prompt.strip() == "":
         system_prompt = DEFAULT_SYSTEM_PROMPT
     max_tokens = int(min(max_tokens, 128))
@@ -87,7 +81,7 @@ def generate_answer(system_prompt, context, message, history, max_tokens, temper
     ).to(DEVICE)
     with torch.no_grad():
-        output_tokens = model.generate(
             **inputs,
             max_new_tokens=max_tokens,
             do_sample=True,
@@ -96,90 +90,75 @@ def generate_answer(system_prompt, context, message, history, max_tokens, temper
             pad_token_id=tokenizer.eos_token_id,
         )
-    full_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
-    if full_text.startswith(prompt):
-        answer = full_text[len(prompt):].strip()
-    else:
-        answer = full_text.strip()
-    history = history + [[message, answer]]
     return answer, history
 def chat(message, history, system_prompt, context, max_tokens, temperature, top_p):
-    if history is None:
-        history = []
-    answer, updated_history = generate_answer(
-        system_prompt=system_prompt,
-        context=context,
-        message=message,
-        history=history,
-        max_tokens=max_tokens,
-        temperature=temperature,
-        top_p=top_p,
     )
-    return "", updated_history
 # ---------------------------------------------------------------------
-# Minimalist Gradio UI
 # ---------------------------------------------------------------------
-with gr.Blocks(css="""
-/* Make the whole app look cleaner and more minimal */
-body { font-family: system-ui, -apple-system, BlinkMacSystemFont, sans-serif; }
-#chat-title { font-size: 1.6rem; font-weight: 500; margin-bottom: 0.25rem; }
-#chat-subtitle { font-size: 0.9rem; color: #666; margin-bottom: 1.5rem; }
-/* Tighten spacing around the chatbot */
-.gradio-container { max-width: 900px; margin: 0 auto; }
-""") as demo:
-    with gr.Column():
-        gr.Markdown(
-            """
-            <div id="chat-title">Iris</div>
-            <div id="chat-subtitle">Minimal chat interface for your fine-tuned Llama 3 model.</div>
-            """,
-            elem_id="header",
-        )
-        chatbot = gr.Chatbot(
-            label="",
-            height=420,
-        )
-        msg = gr.Textbox(
-            label="",
-            placeholder="Type your message and press Enter...",
-        )
-        with gr.Row():
-            send_btn = gr.Button("Send", variant="primary")
-            clear_btn = gr.Button("Clear history")
-        with gr.Accordion("Advanced settings", open=False):
             system_prompt_box = gr.Textbox(
                 label="System prompt",
                 value=DEFAULT_SYSTEM_PROMPT,
                 lines=5,
             )
             context_box = gr.Textbox(
-                label="Additional context",
-                placeholder="Optional: paste any reference text or notes you want the model to use as context.",
                 lines=6,
             )
             max_tokens_slider = gr.Slider(
-                label="Max new tokens",
                 minimum=32,
                 maximum=256,
                 value=128,
                 step=16,
             )
             temperature_slider = gr.Slider(
                 label="Temperature",
                 minimum=0.1,
@@ -187,6 +166,7 @@ body { font-family: system-ui, -apple-system, BlinkMacSystemFont, sans-serif; }
                 value=0.7,
                 step=0.1,
             )
             top_p_slider = gr.Slider(
                 label="Top-p",
                 minimum=0.1,
@@ -195,37 +175,16 @@ body { font-family: system-ui, -apple-system, BlinkMacSystemFont, sans-serif; }
                 step=0.05,
             )
-    inputs = [
-        msg,
-        chatbot,
-        system_prompt_box,
-        context_box,
-        max_tokens_slider,
-        temperature_slider,
-        top_p_slider,
-    ]
-    outputs = [msg, chatbot]
-    msg.submit(
-        fn=chat,
-        inputs=inputs,
-        outputs=outputs,
-    )
-    send_btn.click(
-        fn=chat,
-        inputs=inputs,
-        outputs=outputs,
-    )
-    clear_btn.click(
-        lambda: [],
-        None,
-        chatbot,
-        queue=False,
-    )
 if __name__ == "__main__":
     demo.launch()

 DEVICE = "cpu"
 DTYPE = torch.float32
 torch.set_num_threads(2)
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
     "You are a helpful, precise AI assistant. "
     "Always answer as briefly as possible. "
     "For fact-based questions, answer in one short sentence or a compact bullet list. "
+    "Do not add explanations unless the user explicitly asks for them. "
+    "Respond in English unless the user asks otherwise."
 )
 # ---------------------------------------------------------------------
 # Prompt building
 # ---------------------------------------------------------------------
 def build_prompt(system_prompt, context, history, user_message):
     messages = []
+    if system_prompt.strip():
         messages.append({"role": "system", "content": system_prompt})
+    if context.strip():
+        messages.append({
+            "role": "system",
+            "content": (
+                "The following information is additional context. "
+                "Use it only if relevant:\n" + context
+            )
+        })
     for user, assistant in history:
         messages.append({"role": "user", "content": user})
     messages.append({"role": "user", "content": user_message})
+    return tokenizer.apply_chat_template(
         messages,
         tokenize=False,
         add_generation_prompt=True,
     )
 def generate_answer(system_prompt, context, message, history, max_tokens, temperature, top_p):
     if history is None:
         history = []
+    if not system_prompt.strip():
         system_prompt = DEFAULT_SYSTEM_PROMPT
     max_tokens = int(min(max_tokens, 128))
     ).to(DEVICE)
     with torch.no_grad():
+        outputs = model.generate(
             **inputs,
             max_new_tokens=max_tokens,
             do_sample=True,
             pad_token_id=tokenizer.eos_token_id,
         )
+    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Try to extract only the new part
+    answer = decoded[len(prompt):].strip() if decoded.startswith(prompt) else decoded.strip()
+    history.append([message, answer])
     return answer, history
 def chat(message, history, system_prompt, context, max_tokens, temperature, top_p):
+    answer, history = generate_answer(
+        system_prompt, context, message, history, max_tokens, temperature, top_p
     )
+    return "", history
 # ---------------------------------------------------------------------
+# Minimalist ChatGPT-style UI
 # ---------------------------------------------------------------------
+CSS = """
+#container {max-width: 1200px; margin-left: auto; margin-right: auto;}
+#chat-column {width: 75%;}
+#sidebar {width: 25%; padding-left: 20px;}
+#input-row {margin-top: 12px;}
+"""
+with gr.Blocks(css=CSS) as demo:
+    gr.Markdown("<h2 style='font-weight:600;'>Iris – Your Fine-Tuned Llama 3 Assistant</h2>")
+    with gr.Row(elem_id="container"):
+        # LEFT SIDE: CHAT
+        with gr.Column(elem_id="chat-column"):
+            chatbot = gr.Chatbot(
+                height=500,
+                show_label=False,
+            )
+            with gr.Row(elem_id="input-row"):
+                msg = gr.Textbox(
+                    placeholder="Send a message...",
+                    scale=8,
+                    show_label=False,
+                )
+                send_btn = gr.Button("Send", scale=2)
+        # RIGHT SIDE: SIDEBAR
+        with gr.Column(elem_id="sidebar"):
+            gr.Markdown("### Settings")
             system_prompt_box = gr.Textbox(
                 label="System prompt",
                 value=DEFAULT_SYSTEM_PROMPT,
                 lines=5,
             )
             context_box = gr.Textbox(
+                label="Context",
+                placeholder="Optional reference text...",
                 lines=6,
             )
             max_tokens_slider = gr.Slider(
+                label="Max tokens",
                 minimum=32,
                 maximum=256,
                 value=128,
                 step=16,
             )
             temperature_slider = gr.Slider(
                 label="Temperature",
                 minimum=0.1,
                 value=0.7,
                 step=0.1,
             )
             top_p_slider = gr.Slider(
                 label="Top-p",
                 minimum=0.1,
                 step=0.05,
             )
+            clear_btn = gr.Button("Clear chat")
+    # Chat events
+    inputs = [msg, chatbot, system_prompt_box, context_box, max_tokens_slider, temperature_slider, top_p_slider]
+    outputs = [msg, chatbot]
+    msg.submit(chat, inputs, outputs)
+    send_btn.click(chat, inputs, outputs)
+    clear_btn.click(lambda: [], None, chatbot)
 if __name__ == "__main__":
     demo.launch()