Spaces:

basmala12
/

smollm_interface5

Sleeping

App Files Files Community

basmala12 commited on Nov 21, 2025

Commit

0a70310

verified ·

1 Parent(s): 73e272c

Update app.py

Browse files

Files changed (1) hide show

app.py +53 -13

app.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-import re
 MODEL_NAME = "basmala12/smollm_finetuning5"
-# Load model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
@@ -14,6 +14,7 @@ pipe = pipeline(
     tokenizer=tokenizer,
 )
 def truncate_to_n_sentences(text: str, n: int = 2) -> str:
     """Force output to a maximum of N sentences."""
     parts = re.split(r'([.!?])', text)
@@ -33,32 +34,69 @@ def truncate_to_n_sentences(text: str, n: int = 2) -> str:
     return " ".join(sentences).strip()
 def respond(message, history, system_message, max_tokens, temperature, top_p):
-    """Main chat function."""
-    messages = [{"role": "system", "content": system_message}]
     messages.extend(history)
     messages.append({"role": "user", "content": message})
     prompt = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
-        add_generation_prompt=True
     )
     out = pipe(
         prompt,
-        max_new_tokens=max_tokens,
-        temperature=temperature,
-        top_p=top_p,
         do_sample=True,
     )[0]["generated_text"]
-    # Extract assistant chunk
     if "<|im_start|>assistant" in out:
         out = out.split("<|im_start|>assistant", 1)[-1]
     out = out.replace("<|im_end|>", "").strip()
-    # HARD enforce 2-sentence limit
     out = truncate_to_n_sentences(out, n=2)
     return out
@@ -70,12 +108,14 @@ chatbot = gr.ChatInterface(
     additional_inputs=[
         gr.Textbox(
             value="Answer in 1–2 short sentences with brief logical reasoning. Do not exceed 2 sentences.",
-            label="System message"
         ),
-        gr.Slider(1, 128, value=64, step=1, label="Max new tokens"),  # force brevity
-        gr.Slider(0.1, 2.0, value=0.3, step=0.1, label="Temperature"),  # lower = shorter
         gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p"),
     ],
 )
 if __name__ == "__main__":

+import re
 import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 MODEL_NAME = "basmala12/smollm_finetuning5"
+# Load model & tokenizer once
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
     tokenizer=tokenizer,
 )
 def truncate_to_n_sentences(text: str, n: int = 2) -> str:
     """Force output to a maximum of N sentences."""
     parts = re.split(r'([.!?])', text)
     return " ".join(sentences).strip()
 def respond(message, history, system_message, max_tokens, temperature, top_p):
+    """
+    ChatInterface (type='messages') passes:
+      - message: current user message (str)
+      - history: list[{'role': 'user'/'assistant', 'content': str}]
+      - system_message, max_tokens, temperature, top_p: from additional_inputs
+    We return a plain string: the assistant reply.
+    """
+    # Few-shot prompt to enforce behavior
+    few_shot_prompt = """
+You are a concise reasoning assistant.
+Rules:
+1. ALWAYS answer the user's LAST question only.
+2. Give exactly 1–2 short sentences.
+3. Provide brief, correct reasoning.
+4. Never repeat earlier answers.
+5. Never invent scientific facts.
+Examples:
+User: Why do we sweat?
+Assistant: We sweat to cool the body because evaporation removes heat from the skin. This helps regulate temperature.
+User: Why does metal feel colder than wood?
+Assistant: Metal pulls heat from your skin faster because it conducts heat better than wood. This faster heat transfer makes it feel colder.
+User: Why do birds fly in a V formation?
+Assistant: Birds fly in a V to save energy because each bird rides the lift from the bird ahead. This reduces effort for the whole group.
+""".strip()
+    # Build messages with few-shot + user-configurable system message
+    messages = [
+        {"role": "system", "content": few_shot_prompt},
+        {"role": "system", "content": system_message},
+    ]
     messages.extend(history)
     messages.append({"role": "user", "content": message})
+    # Apply chat template
     prompt = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
+        add_generation_prompt=True,
     )
+    # Generate
     out = pipe(
         prompt,
+        max_new_tokens=int(max_tokens),
+        temperature=float(temperature),
+        top_p=float(top_p),
         do_sample=True,
     )[0]["generated_text"]
+    # Extract assistant part
     if "<|im_start|>assistant" in out:
         out = out.split("<|im_start|>assistant", 1)[-1]
     out = out.replace("<|im_end|>", "").strip()
+    # Enforce 1–2 sentence max
     out = truncate_to_n_sentences(out, n=2)
     return out
     additional_inputs=[
         gr.Textbox(
             value="Answer in 1–2 short sentences with brief logical reasoning. Do not exceed 2 sentences.",
+            label="System message",
         ),
+        gr.Slider(1, 128, value=64, step=1, label="Max new tokens"),
+        gr.Slider(0.1, 2.0, value=0.3, step=0.1, label="Temperature"),
         gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p"),
     ],
+    title="SmolLM2 – Short Reasoning Chat",
+    description="Fine-tuned SmolLM2 (basmala12/smollm_finetuning5) that answers with 1–2 short sentences and brief reasoning.",
 )
 if __name__ == "__main__":