TobDeBer commited on
Commit
0bd8d77
·
1 Parent(s): 0471f91
Files changed (2) hide show
  1. app.py +19 -27
  2. requirements.txt +1 -0
app.py CHANGED
@@ -4,8 +4,8 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
  import time
5
  import random
6
 
7
- # Model configuration - using TinyLlama for efficient CPU inference
8
- MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
9
 
10
  # Global variables for model components
11
  tokenizer = None
@@ -44,12 +44,12 @@ def load_model():
44
  return f"❌ Error loading model: {str(e)}"
45
 
46
  def format_prompt(prompt, system_prompt=None):
47
- """Format the prompt for chat-style models"""
 
48
  if system_prompt:
49
- formatted = f"<|system|>\n{system_prompt}\n<|user|>\n{prompt}\n<|assistant|>"
50
- else:
51
- formatted = f"<|user|>\n{prompt}\n<|assistant|>"
52
- return formatted
53
 
54
  def generate_text(
55
  prompt,
@@ -63,7 +63,7 @@ def generate_text(
63
  global text_generator
64
 
65
  if text_generator is None:
66
- return "⚠️ Please load the model first using the 'Load Model' button."
67
 
68
  if not prompt.strip():
69
  return "⚠️ Please enter a prompt."
@@ -72,12 +72,6 @@ def generate_text(
72
  # Format the prompt
73
  formatted_prompt = format_prompt(prompt, system_prompt)
74
 
75
- # Update pipeline parameters
76
- text_generator.max_new_tokens = max_length
77
- text_generator.temperature = temperature
78
- text_generator.top_p = top_p
79
- text_generator.repetition_penalty = repetition_penalty
80
-
81
  # Generate response
82
  start_time = time.time()
83
  result = text_generator(
@@ -88,19 +82,14 @@ def generate_text(
88
  repetition_penalty=repetition_penalty,
89
  do_sample=True,
90
  pad_token_id=tokenizer.eos_token_id,
91
- eos_token_id=tokenizer.eos_token_id
 
92
  )
93
 
94
  generation_time = time.time() - start_time
95
 
96
- # Extract the generated text
97
- generated_text = result[0]["generated_text"]
98
-
99
- # Extract only the assistant's response
100
- if "<|assistant|>" in generated_text:
101
- response = generated_text.split("<|assistant|>")[-1].strip()
102
- else:
103
- response = generated_text
104
 
105
  # Format output with metadata
106
  output = f"**Response:**\n{response}\n\n---\n*Generated in {generation_time:.2f} seconds*"
@@ -136,7 +125,7 @@ with gr.Blocks() as demo:
136
  # 🤖 Smol LLM Inference GUI
137
 
138
  **Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)** -
139
- Efficient text generation using TinyLlama
140
 
141
  This application runs a compact language model locally for text generation.
142
  Perfect for chat, completion tasks, and creative writing.
@@ -150,11 +139,11 @@ with gr.Blocks() as demo:
150
  gr.Markdown("### 📦 Model Management")
151
  model_status = gr.Textbox(
152
  label="Model Status",
153
- value="Model not loaded. Click 'Load Model' to start.",
154
  interactive=False
155
  )
156
  load_btn = gr.Button(
157
- "🔄 Load Model",
158
  variant="primary",
159
  size="lg"
160
  )
@@ -288,12 +277,15 @@ with gr.Blocks() as demo:
288
  api_visibility="public"
289
  )
290
 
 
 
 
291
  # Launch the application
292
  demo.launch(
293
  theme=custom_theme,
294
  footer_links=[
295
  {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
296
- {"label": "TinyLlama Model", "url": "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0"},
297
  {"label": "Gradio", "url": "https://gradio.app"}
298
  ],
299
  share=False,
 
4
  import time
5
  import random
6
 
7
+ # Model configuration - using SmolLM2 for efficient inference
8
+ MODEL_NAME = "HuggingFaceTB/SmolLM2-135M-Instruct"
9
 
10
  # Global variables for model components
11
  tokenizer = None
 
44
  return f"❌ Error loading model: {str(e)}"
45
 
46
  def format_prompt(prompt, system_prompt=None):
47
+ """Format the prompt for chat-style models using tokenizer's template"""
48
+ messages = []
49
  if system_prompt:
50
+ messages.append({"role": "system", "content": system_prompt})
51
+ messages.append({"role": "user", "content": prompt})
52
+ return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
53
 
54
  def generate_text(
55
  prompt,
 
63
  global text_generator
64
 
65
  if text_generator is None:
66
+ return "⚠️ Please wait for the model to finish loading..."
67
 
68
  if not prompt.strip():
69
  return "⚠️ Please enter a prompt."
 
72
  # Format the prompt
73
  formatted_prompt = format_prompt(prompt, system_prompt)
74
 
 
 
 
 
 
 
75
  # Generate response
76
  start_time = time.time()
77
  result = text_generator(
 
82
  repetition_penalty=repetition_penalty,
83
  do_sample=True,
84
  pad_token_id=tokenizer.eos_token_id,
85
+ eos_token_id=tokenizer.eos_token_id,
86
+ return_full_text=False
87
  )
88
 
89
  generation_time = time.time() - start_time
90
 
91
+ # Extract the generated response directly
92
+ response = result[0]["generated_text"].strip()
 
 
 
 
 
 
93
 
94
  # Format output with metadata
95
  output = f"**Response:**\n{response}\n\n---\n*Generated in {generation_time:.2f} seconds*"
 
125
  # 🤖 Smol LLM Inference GUI
126
 
127
  **Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder)** -
128
+ Efficient text generation using SmolLM2-135M
129
 
130
  This application runs a compact language model locally for text generation.
131
  Perfect for chat, completion tasks, and creative writing.
 
139
  gr.Markdown("### 📦 Model Management")
140
  model_status = gr.Textbox(
141
  label="Model Status",
142
+ value="Model is loading automatically...",
143
  interactive=False
144
  )
145
  load_btn = gr.Button(
146
+ "🔄 Reload Model",
147
  variant="primary",
148
  size="lg"
149
  )
 
277
  api_visibility="public"
278
  )
279
 
280
+ # Auto-load the model at startup
281
+ load_model()
282
+
283
  # Launch the application
284
  demo.launch(
285
  theme=custom_theme,
286
  footer_links=[
287
  {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
288
+ {"label": "SmolLM2 Model", "url": "https://huggingface.co/HuggingFaceTB/SmolLM2-135M-Instruct"},
289
  {"label": "Gradio", "url": "https://gradio.app"}
290
  ],
291
  share=False,
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  torch
2
  transformers
 
 
1
  torch
2
  transformers
3
+ gradio