Scaryscar commited on
Commit
abd6fbd
·
verified ·
1 Parent(s): b7d954c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -25
app.py CHANGED
@@ -3,45 +3,44 @@ from huggingface_hub import login
3
  import gradio as gr
4
  import os
5
 
6
- # Authenticate (set HF_TOKEN in Space secrets)
 
 
 
 
 
 
7
  login(token=os.environ.get("HF_TOKEN"))
8
 
9
- # Load lightweight pipeline (faster than full model load)
10
  math_pipeline = pipeline(
11
  "text-generation",
12
  model="google/gemma-2b-it",
13
  device_map="auto",
14
- torch_dtype="auto",
15
- model_kwargs={"load_in_4bit": True}
 
 
16
  )
17
 
18
  def solve_math(question):
19
- """Super-fast response with optimized prompt"""
20
- prompt = f"""Solve this math problem concisely:
21
-
22
- Question: {question}
23
- Answer:"""
24
-
25
  try:
26
  result = math_pipeline(
27
  prompt,
28
- max_new_tokens=150, # Shorter = faster
29
- temperature=0.1, # More deterministic
30
- do_sample=False, # Faster generation
31
- num_return_sequences=1
32
  )
33
- return result[0]['generated_text'].split("Answer:")[-1].strip()
34
  except Exception as e:
35
- return f"🚨 Error: {str(e)}"
36
-
37
- # Preload pipeline
38
- solve_math("2+2=") # Warm-up call
39
 
40
- # Minimal UI for fastest response
41
- with gr.Blocks(title="⚡ Instant Math Solver") as demo:
42
- gr.Markdown("### Enter a math problem:")
43
- question = gr.Textbox(lines=2)
44
- answer = gr.Textbox(label="Answer", lines=3)
45
  question.submit(solve_math, question, answer)
46
 
47
- demo.launch(server_name="0.0.0.0")
 
3
  import gradio as gr
4
  import os
5
 
6
+ # 1. First verify bitsandbytes is installed
7
+ try:
8
+ import bitsandbytes # noqa
9
+ except ImportError:
10
+ raise ImportError("bitsandbytes not installed! Add it to requirements.txt")
11
+
12
+ # 2. Authenticate
13
  login(token=os.environ.get("HF_TOKEN"))
14
 
15
+ # 3. Load model WITHOUT 4-bit (for compatibility)
16
  math_pipeline = pipeline(
17
  "text-generation",
18
  model="google/gemma-2b-it",
19
  device_map="auto",
20
+ torch_dtype="auto", # Let transformers choose dtype
21
+ model_kwargs={
22
+ "low_cpu_mem_usage": True # Reduces memory spikes
23
+ }
24
  )
25
 
26
  def solve_math(question):
27
+ prompt = f"Solve concisely:\nQ: {question}\nA:"
 
 
 
 
 
28
  try:
29
  result = math_pipeline(
30
  prompt,
31
+ max_new_tokens=100, # Shorter = faster
32
+ temperature=0.3,
33
+ do_sample=False # Faster generation
 
34
  )
35
+ return result[0]['generated_text'].split("A:")[-1].strip()
36
  except Exception as e:
37
+ return f"Error: {str(e)}"
 
 
 
38
 
39
+ # Minimal UI
40
+ with gr.Blocks() as demo:
41
+ gr.Markdown("## Math Solver")
42
+ question = gr.Textbox(label="Problem")
43
+ answer = gr.Textbox(label="Solution")
44
  question.submit(solve_math, question, answer)
45
 
46
+ demo.launch()