| import gradio as gr |
| import torch |
| from transformers import AutoTokenizer, AutoModelForCausalLM |
| import time |
| import os |
|
|
| |
| MODEL_ID = "saish-shetty/SmolLM2-1.7B-pre-trained" |
| DEVICE = "cpu" |
| MAX_LENGTH = 512 |
|
|
| def load_model(): |
| """Load model and tokenizer""" |
| print(f"Loading model: {MODEL_ID}") |
| |
| try: |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) |
| model = AutoModelForCausalLM.from_pretrained( |
| MODEL_ID, |
| torch_dtype=torch.float32, |
| device_map=None, |
| trust_remote_code=True, |
| low_cpu_mem_usage=True |
| ) |
| |
| |
| model = model.to('cpu') |
| model.eval() |
| |
| if tokenizer.pad_token is None: |
| tokenizer.pad_token = tokenizer.eos_token |
| |
| print("Model loaded successfully!") |
| return model, tokenizer |
| |
| except Exception as e: |
| print(f"Error loading model: {e}") |
| raise e |
|
|
| |
| model, tokenizer = load_model() |
|
|
| def generate_text( |
| prompt, |
| max_new_tokens=100, |
| temperature=0.7, |
| top_p=0.9, |
| top_k=50, |
| repetition_penalty=1.1 |
| ): |
| """Generate text based on input prompt and parameters""" |
| |
| if not prompt.strip(): |
| return "Please enter a prompt to generate text." |
| |
| try: |
| |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=MAX_LENGTH) |
| |
| |
| start_time = time.time() |
| |
| with torch.no_grad(): |
| outputs = model.generate( |
| **inputs, |
| max_new_tokens=max_new_tokens, |
| temperature=temperature, |
| top_p=top_p, |
| top_k=top_k, |
| repetition_penalty=repetition_penalty, |
| do_sample=True, |
| pad_token_id=tokenizer.pad_token_id, |
| eos_token_id=tokenizer.eos_token_id, |
| ) |
| |
| generation_time = time.time() - start_time |
| |
| |
| generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| new_text = generated_text[len(prompt):].strip() |
| |
| |
| tokens_generated = len(tokenizer.encode(new_text, add_special_tokens=False)) |
| tokens_per_second = tokens_generated / generation_time if generation_time > 0 else 0 |
| |
| |
| result = f"{new_text}\n\n" |
| result += f"π **Generation Stats:**\n" |
| result += f"β’ Tokens generated: {tokens_generated}\n" |
| result += f"β’ Time taken: {generation_time:.2f}s\n" |
| result += f"β’ Speed: {tokens_per_second:.1f} tokens/sec" |
| |
| return result |
| |
| except Exception as e: |
| return f"Error generating text: {str(e)}" |
|
|
| def get_model_info(): |
| """Return model information""" |
| info = f""" |
| # π€ SmolLM2-1.7B Pre-trained from Scratch on Cosmopedia-v2 |
| |
| **Base Model:** HuggingFaceTB/SmolLM2-1.7B |
| **Training Dataset:** Cosmopedia-v2 (1B tokens subset) |
| **Training Steps:** 30,000 steps |
| **Final Loss:** 3.7547 |
| **Parameters:** 1.7B |
| |
| **Training Configuration:** |
| - Batch Size: 1 per device |
| - Gradient Accumulation: 16 steps |
| - Learning Rate: 2e-5 |
| - Sequence Length: 2048 tokens |
| - Optimizer: 8-bit AdamW |
| - Mixed Precision: bf16 |
| |
| This model was pre-trained from scratch on educational content from Cosmopedia-v2, |
| making it particularly good at explaining concepts, answering questions, |
| and generating educational content. |
| """ |
| return info |
|
|
| |
| example_prompts = [ |
| "What is the meaning of life and how do different philosophical traditions approach this question?", |
| "Explain the concept of free will versus determinism in philosophy.", |
| "What are the main arguments for and against the existence of objective moral truths?", |
| "How do different cultures define happiness and what can we learn from these perspectives?", |
| "What is consciousness and why is it considered one of philosophy's hardest problems?", |
| "Discuss the relationship between knowledge and belief in epistemology.", |
| "What are the ethical implications of artificial intelligence in modern society?", |
| "How do ancient philosophical teachings remain relevant in contemporary life?", |
| ] |
|
|
| |
| def create_interface(): |
| with gr.Blocks( |
| title="SmolLM2-1.7B Cosmopedia Demo", |
| theme=gr.themes.Soft(), |
| css=""" |
| .model-info { |
| background-color: transparent !important; |
| color: var(--body-text-color) !important; |
| padding: 20px; |
| border-radius: 10px; |
| margin: 10px 0; |
| } |
| """ |
| ) as demo: |
| |
| |
| gr.Markdown("# π SmolLM2-1.7B Cosmopedia Chat") |
| gr.Markdown("*Pre-trained From Scratch on educational content for thoughtful conversations*") |
| |
| with gr.Row(): |
| with gr.Column(scale=2): |
| |
| prompt_input = gr.Textbox( |
| label="π Your Prompt", |
| placeholder="Ask a philosophical question or request an explanation...", |
| lines=3, |
| value="" |
| ) |
| |
| |
| with gr.Accordion("π§ Generation Parameters", open=False): |
| max_tokens = gr.Slider( |
| minimum=10, |
| maximum=300, |
| value=100, |
| step=10, |
| label="Max New Tokens" |
| ) |
| temperature = gr.Slider( |
| minimum=0.1, |
| maximum=2.0, |
| value=0.7, |
| step=0.1, |
| label="Temperature (creativity)" |
| ) |
| top_p = gr.Slider( |
| minimum=0.1, |
| maximum=1.0, |
| value=0.9, |
| step=0.05, |
| label="Top-p (nucleus sampling)" |
| ) |
| top_k = gr.Slider( |
| minimum=1, |
| maximum=100, |
| value=50, |
| step=1, |
| label="Top-k" |
| ) |
| repetition_penalty = gr.Slider( |
| minimum=1.0, |
| maximum=2.0, |
| value=1.1, |
| step=0.05, |
| label="Repetition Penalty" |
| ) |
| |
| |
| generate_btn = gr.Button("π Generate Text", variant="primary", size="lg") |
| |
| |
| gr.Markdown("### π‘ Example Prompts:") |
| example_buttons = [] |
| for i, example in enumerate(example_prompts): |
| btn = gr.Button(f"π {example[:60]}..." if len(example) > 60 else f"π {example}", size="sm") |
| example_buttons.append((btn, example)) |
| |
| with gr.Column(scale=2): |
| |
| output_text = gr.Textbox( |
| label="π€ Generated Response", |
| lines=15, |
| max_lines=20, |
| show_copy_button=True |
| ) |
| |
| |
| clear_btn = gr.Button("π§Ή Clear", variant="secondary") |
| |
| |
| with gr.Accordion("βΉοΈ Model Information", open=False): |
| model_info = gr.Markdown(get_model_info()) |
| |
| |
| generate_btn.click( |
| fn=generate_text, |
| inputs=[prompt_input, max_tokens, temperature, top_p, top_k, repetition_penalty], |
| outputs=output_text |
| ) |
| |
| |
| for btn, example in example_buttons: |
| btn.click( |
| lambda example=example: example, |
| outputs=prompt_input |
| ) |
| |
| clear_btn.click( |
| lambda: ("", ""), |
| outputs=[prompt_input, output_text] |
| ) |
| |
| |
| gr.Markdown(""" |
| --- |
| **Note:** This model runs on CPU for demo purposes. For faster inference, use GPU deployment. |
| |
| π **Model:** [saish-shetty/SmolLM2-1.7B-pre-trained](https://huggingface.co/saish-shetty/SmolLM2-1.7B-pre-trained) |
| """) |
| |
| return demo |
|
|
| |
| if __name__ == "__main__": |
| demo = create_interface() |
| demo.launch( |
| share=False, |
| server_name="0.0.0.0", |
| server_port=7860 |
| ) |