Spaces:

TobDeBer
/

SmolTransform

Sleeping

App Files Files Community

TobDeBer commited on Dec 17, 2025

Commit

0bd8d77

1 Parent(s): 0471f91

use Smol

Browse files

Files changed (2) hide show

app.py +19 -27
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -4,8 +4,8 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import time
 import random
-# Model configuration - using TinyLlama for efficient CPU inference
-MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
 # Global variables for model components
 tokenizer = None
@@ -44,12 +44,12 @@ def load_model():
         return f"❌ Error loading model: {str(e)}"
 def format_prompt(prompt, system_prompt=None):
-    """Format the prompt for chat-style models"""
     if system_prompt:
-        formatted = f"<|system|>\n{system_prompt}\n<|user|>\n{prompt}\n<|assistant|>"
-    else:
-        formatted = f"<|user|>\n{prompt}\n<|assistant|>"
-    return formatted
 def generate_text(
     prompt,
@@ -63,7 +63,7 @@ def generate_text(
     global text_generator
     if text_generator is None:
-        return "⚠️ Please load the model first using the 'Load Model' button."
     if not prompt.strip():
         return "⚠️ Please enter a prompt."
@@ -72,12 +72,6 @@ def generate_text(
         # Format the prompt
         formatted_prompt = format_prompt(prompt, system_prompt)
-        # Update pipeline parameters
-        text_generator.max_new_tokens = max_length
-        text_generator.temperature = temperature
-        text_generator.top_p = top_p
-        text_generator.repetition_penalty = repetition_penalty
         # Generate response
         start_time = time.time()
         result = text_generator(
@@ -88,19 +82,14 @@ def generate_text(
             repetition_penalty=repetition_penalty,
             do_sample=True,
             pad_token_id=tokenizer.eos_token_id,
-            eos_token_id=tokenizer.eos_token_id
         )
         generation_time = time.time() - start_time
-        # Extract the generated text
-        generated_text = result[0]["generated_text"]
-        # Extract only the assistant's response
-        if "<|assistant|>" in generated_text:
-            response = generated_text.split("<|assistant|>")[-1].strip()
-        else:
-            response = generated_text
         # Format output with metadata
         output = f"**Response:**\n{response}\n\n---\n*Generated in {generation_time:.2f} seconds*"
@@ -136,7 +125,7 @@ with gr.Blocks() as demo:
         # 🤖 Smol LLM Inference GUI
         **Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)** -
-        Efficient text generation using TinyLlama
         This application runs a compact language model locally for text generation.
         Perfect for chat, completion tasks, and creative writing.
@@ -150,11 +139,11 @@ with gr.Blocks() as demo:
                 gr.Markdown("### 📦 Model Management")
                 model_status = gr.Textbox(
                     label="Model Status",
-                    value="Model not loaded. Click 'Load Model' to start.",
                     interactive=False
                 )
                 load_btn = gr.Button(
-                    "🔄 Load Model",
                     variant="primary",
                     size="lg"
                 )
@@ -288,12 +277,15 @@ with gr.Blocks() as demo:
         api_visibility="public"
     )
 # Launch the application
 demo.launch(
     theme=custom_theme,
     footer_links=[
         {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
-        {"label": "TinyLlama Model", "url": "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0"},
         {"label": "Gradio", "url": "https://gradio.app"}
     ],
     share=False,

 import time
 import random
+# Model configuration - using SmolLM2 for efficient inference
+MODEL_NAME = "HuggingFaceTB/SmolLM2-135M-Instruct"
 # Global variables for model components
 tokenizer = None
         return f"❌ Error loading model: {str(e)}"
 def format_prompt(prompt, system_prompt=None):
+    """Format the prompt for chat-style models using tokenizer's template"""
+    messages = []
     if system_prompt:
+        messages.append({"role": "system", "content": system_prompt})
+    messages.append({"role": "user", "content": prompt})
+    return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 def generate_text(
     prompt,
     global text_generator
     if text_generator is None:
+        return "⚠️ Please wait for the model to finish loading..."
     if not prompt.strip():
         return "⚠️ Please enter a prompt."
         # Format the prompt
         formatted_prompt = format_prompt(prompt, system_prompt)
         # Generate response
         start_time = time.time()
         result = text_generator(
             repetition_penalty=repetition_penalty,
             do_sample=True,
             pad_token_id=tokenizer.eos_token_id,
+            eos_token_id=tokenizer.eos_token_id,
+            return_full_text=False
         )
         generation_time = time.time() - start_time
+        # Extract the generated response directly
+        response = result[0]["generated_text"].strip()
         # Format output with metadata
         output = f"**Response:**\n{response}\n\n---\n*Generated in {generation_time:.2f} seconds*"
         # 🤖 Smol LLM Inference GUI
         **Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)** -
+        Efficient text generation using SmolLM2-135M
         This application runs a compact language model locally for text generation.
         Perfect for chat, completion tasks, and creative writing.
                 gr.Markdown("### 📦 Model Management")
                 model_status = gr.Textbox(
                     label="Model Status",
+                    value="Model is loading automatically...",
                     interactive=False
                 )
                 load_btn = gr.Button(
+                    "🔄 Reload Model",
                     variant="primary",
                     size="lg"
                 )
         api_visibility="public"
     )
+# Auto-load the model at startup
+load_model()
 # Launch the application
 demo.launch(
     theme=custom_theme,
     footer_links=[
         {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
+        {"label": "SmolLM2 Model", "url": "https://huggingface.co/HuggingFaceTB/SmolLM2-135M-Instruct"},
         {"label": "Gradio", "url": "https://gradio.app"}
     ],
     share=False,

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 torch
 transformers

 torch
 transformers
+gradio