Spaces:

TobDeBer
/

SmolTransform

Sleeping

App Files Files Community

TobDeBer commited on Dec 17, 2025

Commit

093459e

verified ·

1 Parent(s): ccdf284

Create app.py

Browse files

Files changed (1) hide show

app.py +301 -0

app.py ADDED Viewed

	@@ -0,0 +1,301 @@

+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+import time
+import random
+# Model configuration - using TinyLlama for efficient CPU inference
+MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+# Global variables for model components
+tokenizer = None
+model = None
+text_generator = None
+def load_model():
+    """Load the Smol LLM model and tokenizer"""
+    global tokenizer, model, text_generator
+    try:
+        print(f"Loading model: {MODEL_NAME}")
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+        model = AutoModelForCausalLM.from_pretrained(
+            MODEL_NAME,
+            torch_dtype=torch.float32,  # Use float32 for CPU
+            device_map="auto"
+        )
+        # Create text generation pipeline
+        text_generator = pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            max_new_tokens=512,
+            temperature=0.7,
+            top_p=0.95,
+            do_sample=True
+        )
+        # Set pad token if not present
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
+        return "✅ Model loaded successfully!"
+    except Exception as e:
+        return f"❌ Error loading model: {str(e)}"
+def format_prompt(prompt, system_prompt=None):
+    """Format the prompt for chat-style models"""
+    if system_prompt:
+        formatted = f"<|system|>\n{system_prompt}\n<|user|>\n{prompt}\n<|assistant|>"
+    else:
+        formatted = f"<|user|>\n{prompt}\n<|assistant|>"
+    return formatted
+def generate_text(
+    prompt,
+    max_length=200,
+    temperature=0.7,
+    top_p=0.95,
+    repetition_penalty=1.1,
+    system_prompt="You are a helpful AI assistant. Provide clear and concise answers."
+):
+    """Generate text using the loaded model"""
+    global text_generator
+    if text_generator is None:
+        return "⚠️ Please load the model first using the 'Load Model' button."
+    if not prompt.strip():
+        return "⚠️ Please enter a prompt."
+    try:
+        # Format the prompt
+        formatted_prompt = format_prompt(prompt, system_prompt)
+        # Update pipeline parameters
+        text_generator.max_new_tokens = max_length
+        text_generator.temperature = temperature
+        text_generator.top_p = top_p
+        text_generator.repetition_penalty = repetition_penalty
+        # Generate response
+        start_time = time.time()
+        result = text_generator(
+            formatted_prompt,
+            max_new_tokens=max_length,
+            temperature=temperature,
+            top_p=top_p,
+            repetition_penalty=repetition_penalty,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id,
+            eos_token_id=tokenizer.eos_token_id
+        )
+        generation_time = time.time() - start_time
+        # Extract the generated text
+        generated_text = result[0]["generated_text"]
+        # Extract only the assistant's response
+        if "<|assistant|>" in generated_text:
+            response = generated_text.split("<|assistant|>")[-1].strip()
+        else:
+            response = generated_text
+        # Format output with metadata
+        output = f"**Response:**\n{response}\n\n---\n*Generated in {generation_time:.2f} seconds*"
+        return output
+    except Exception as e:
+        return f"❌ Error during generation: {str(e)}"
+def clear_chat():
+    """Clear the chat interface"""
+    return "", ""
+# Create custom theme
+custom_theme = gr.themes.Soft(
+    primary_hue="blue",
+    secondary_hue="indigo",
+    neutral_hue="slate",
+    font=gr.themes.GoogleFont("Inter"),
+    text_size="lg",
+    spacing_size="lg",
+    radius_size="md"
+).set(
+    button_primary_background_fill="*primary_600",
+    button_primary_background_fill_hover="*primary_700",
+    block_title_text_weight="600",
+)
+# Build the Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown(
+        """
+        # 🤖 Smol LLM Inference GUI
+        **Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)** -
+        Efficient text generation using TinyLlama
+        This application runs a compact language model locally for text generation.
+        Perfect for chat, completion tasks, and creative writing.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=2):
+            # Model loading section
+            with gr.Group():
+                gr.Markdown("### 📦 Model Management")
+                model_status = gr.Textbox(
+                    label="Model Status",
+                    value="Model not loaded. Click 'Load Model' to start.",
+                    interactive=False
+                )
+                load_btn = gr.Button(
+                    "🔄 Load Model",
+                    variant="primary",
+                    size="lg"
+                )
+                # Generation parameters
+                gr.Markdown("### ⚙️ Generation Parameters")
+                with gr.Row():
+                    max_length = gr.Slider(
+                        minimum=50,
+                        maximum=1024,
+                        value=200,
+                        step=50,
+                        label="Max Tokens"
+                    )
+                    temperature = gr.Slider(
+                        minimum=0.1,
+                        maximum=2.0,
+                        value=0.7,
+                        step=0.1,
+                        label="Temperature"
+                    )
+                with gr.Row():
+                    top_p = gr.Slider(
+                        minimum=0.1,
+                        maximum=1.0,
+                        value=0.95,
+                        step=0.05,
+                        label="Top-p"
+                    )
+                    repetition_penalty = gr.Slider(
+                        minimum=1.0,
+                        maximum=2.0,
+                        value=1.1,
+                        step=0.1,
+                        label="Repetition Penalty"
+                    )
+                system_prompt = gr.Textbox(
+                    label="System Prompt",
+                    value="You are a helpful AI assistant. Provide clear and concise answers.",
+                    lines=3,
+                    placeholder="Enter a system prompt to guide the model's behavior..."
+                )
+        with gr.Column(scale=3):
+            # Main interface
+            with gr.Group():
+                gr.Markdown("### 💬 Text Generation")
+                prompt_input = gr.Textbox(
+                    label="Enter your prompt",
+                    placeholder="Type your message here...",
+                    lines=4,
+                    autofocus=True
+                )
+                with gr.Row():
+                    generate_btn = gr.Button(
+                        "🚀 Generate",
+                        variant="primary",
+                        size="lg"
+                    )
+                    clear_btn = gr.Button(
+                        "🗑️ Clear",
+                        variant="secondary"
+                    )
+                output_text = gr.Markdown(
+                    label="Generated Response",
+                    value="*Response will appear here...*"
+                )
+    # Example prompts
+    with gr.Accordion("📝 Example Prompts", open=False):
+        gr.Examples(
+            examples=[
+                ["Write a short story about a robot discovering music."],
+                ["Explain quantum computing in simple terms."],
+                ["Create a poem about the changing seasons."],
+                ["What are the benefits of renewable energy?"],
+                ["Write a Python function to calculate fibonacci numbers."],
+                ["Describe the perfect day in your own words."],
+                ["Explain the concept of machine learning to a beginner."],
+                ["Create a dialogue between two friends planning a trip."]
+            ],
+            inputs=[prompt_input],
+            label="Click an example to get started"
+        )
+    # Event handlers
+    load_btn.click(
+        fn=load_model,
+        outputs=[model_status],
+        api_visibility="public"
+    )
+    generate_btn.click(
+        fn=generate_text,
+        inputs=[
+            prompt_input,
+            max_length,
+            temperature,
+            top_p,
+            repetition_penalty,
+            system_prompt
+        ],
+        outputs=[output_text],
+        api_visibility="public"
+    )
+    clear_btn.click(
+        fn=clear_chat,
+        outputs=[prompt_input],
+        api_visibility="private"
+    )
+    # Allow Enter key to generate
+    prompt_input.submit(
+        fn=generate_text,
+        inputs=[
+            prompt_input,
+            max_length,
+            temperature,
+            top_p,
+            repetition_penalty,
+            system_prompt
+        ],
+        outputs=[output_text],
+        api_visibility="public"
+    )
+# Launch the application
+demo.launch(
+    theme=custom_theme,
+    footer_links=[
+        {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
+        {"label": "TinyLlama Model", "url": "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0"},
+        {"label": "Gradio", "url": "https://gradio.app"}
+    ],
+    share=False,
+    show_error=True
+)