Scaryscar commited on
Commit
b2f98f0
·
verified ·
1 Parent(s): 5370515

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -84
app.py CHANGED
@@ -1,100 +1,47 @@
1
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
2
  from huggingface_hub import login
3
- import torch
4
  import gradio as gr
5
  import os
6
 
7
- # Configuration
8
- MODEL_NAME = "google/gemma-2b-it"
9
- CACHE_DIR = "/tmp"
10
- MAX_TOKENS = 200 # Reduced for faster responses
11
-
12
- # Authenticate (HF_TOKEN must be set in Space secrets)
13
  login(token=os.environ.get("HF_TOKEN"))
14
 
15
- # 4-bit quantization for memory efficiency
16
- quant_config = BitsAndBytesConfig(
17
- load_in_4bit=True,
18
- bnb_4bit_compute_dtype=torch.float16,
19
- bnb_4bit_quant_type="nf4"
 
 
20
  )
21
 
22
- # Load model with error handling
23
- try:
24
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
25
- model = AutoModelForCausalLM.from_pretrained(
26
- MODEL_NAME,
27
- quantization_config=quant_config,
28
- device_map="auto",
29
- torch_dtype=torch.float16,
30
- cache_dir=CACHE_DIR
31
- )
32
- except Exception as e:
33
- raise gr.Error(f"⚠️ Model loading failed. Please check your token and try again.\nError: {str(e)}")
34
-
35
  def solve_math(question):
36
- """Generate step-by-step solutions with error handling"""
 
 
 
 
 
37
  try:
38
- prompt = f"Solve this step by step:\n\nQuestion: {question}\nAnswer:"
39
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
40
-
41
- outputs = model.generate(
42
- **inputs,
43
- max_new_tokens=MAX_TOKENS,
44
- temperature=0.3, # Lower = more deterministic answers
45
- do_sample=True,
46
- pad_token_id=tokenizer.eos_token_id
47
  )
48
-
49
- answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
50
- return answer.split("Answer:")[-1].strip()
51
-
52
  except Exception as e:
53
- return f" Error generating answer: {str(e)}"
54
 
55
- # Preload model for faster first response
56
  solve_math("2+2=") # Warm-up call
57
 
58
- # Gradio Interface
59
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
60
- gr.Markdown("""<h1><center>⚡ Gemma-2B Math Solver</center></h1>""")
61
-
62
- with gr.Row():
63
- question = gr.Textbox(
64
- label="Enter your math problem",
65
- placeholder="What is the integral of x^2 from 0 to 3?",
66
- lines=3
67
- )
68
-
69
- with gr.Row():
70
- submit_btn = gr.Button("Solve", variant="primary")
71
-
72
- with gr.Row():
73
- answer = gr.Textbox(
74
- label="Step-by-step solution",
75
- lines=6,
76
- interactive=False
77
- )
78
-
79
- # Examples for quick testing
80
- gr.Examples(
81
- examples=[
82
- ["What is 2^10 + 5*3?"],
83
- ["Solve for x: 3x + 5 = 20"],
84
- ["Calculate the area of a circle with radius 4"]
85
- ],
86
- inputs=question
87
- )
88
-
89
- submit_btn.click(
90
- fn=solve_math,
91
- inputs=question,
92
- outputs=answer,
93
- api_name="solve"
94
- )
95
 
96
- if __name__ == "__main__":
97
- demo.launch(
98
- server_name="0.0.0.0",
99
- server_port=7860
100
- )
 
1
+ from transformers import pipeline
2
  from huggingface_hub import login
 
3
  import gradio as gr
4
  import os
5
 
6
+ # Authenticate (set HF_TOKEN in Space secrets)
 
 
 
 
 
7
  login(token=os.environ.get("HF_TOKEN"))
8
 
9
+ # Load lightweight pipeline (faster than full model load)
10
+ math_pipeline = pipeline(
11
+ "text-generation",
12
+ model="google/gemma-2b-it",
13
+ device_map="auto",
14
+ torch_dtype="auto",
15
+ model_kwargs={"load_in_4bit": True}
16
  )
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def solve_math(question):
19
+ """Super-fast response with optimized prompt"""
20
+ prompt = f"""Solve this math problem concisely:
21
+
22
+ Question: {question}
23
+ Answer:"""
24
+
25
  try:
26
+ result = math_pipeline(
27
+ prompt,
28
+ max_new_tokens=150, # Shorter = faster
29
+ temperature=0.1, # More deterministic
30
+ do_sample=False, # Faster generation
31
+ num_return_sequences=1
 
 
 
32
  )
33
+ return result[0]['generated_text'].split("Answer:")[-1].strip()
 
 
 
34
  except Exception as e:
35
+ return f"🚨 Error: {str(e)}"
36
 
37
+ # Preload pipeline
38
  solve_math("2+2=") # Warm-up call
39
 
40
+ # Minimal UI for fastest response
41
+ with gr.Blocks(title="⚡ Instant Math Solver") as demo:
42
+ gr.Markdown("### Enter a math problem:")
43
+ question = gr.Textbox(lines=2)
44
+ answer = gr.Textbox(label="Answer", lines=3)
45
+ question.submit(solve_math, question, answer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ demo.launch(server_name="0.0.0.0")