Spaces:

Scaryscar
/

Math-charting-model

Sleeping

App Files Files Community

Scaryscar commited on Jul 26, 2025

Commit

c99d5db

verified ·

1 Parent(s): 10c007d

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -11

app.py CHANGED Viewed

@@ -1,39 +1,75 @@
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from huggingface_hub import login
 import torch
 import gradio as gr
 import os
-# Authenticate using HF_TOKEN from Space secrets
 login(token=os.environ.get("HF_TOKEN"))
 # Configuration
 MODEL_NAME = "google/gemma-2b-it"
 CACHE_DIR = "/tmp"
-# Load model with authentication
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
     device_map="auto",
     torch_dtype=torch.float16,
     cache_dir=CACHE_DIR
 )
 def solve_math(question):
-    prompt = f"Question: {question}\nAnswer:"
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    outputs = model.generate(**inputs, max_new_tokens=200)
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
 # Gradio Interface
-with gr.Blocks() as demo:
-    gr.Markdown("## Gemma-2B Math Solver")
     with gr.Row():
-        question = gr.Textbox(label="Math Problem", placeholder="Enter your question here...")
     with gr.Row():
-        answer = gr.Textbox(label="Solution", interactive=False)
-    question.submit(fn=solve_math, inputs=question, outputs=answer)
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from huggingface_hub import login
 import torch
 import gradio as gr
 import os
+# Authenticate with Hugging Face
 login(token=os.environ.get("HF_TOKEN"))
 # Configuration
 MODEL_NAME = "google/gemma-2b-it"
 CACHE_DIR = "/tmp"
+# 4-bit quantization config
+quant_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.float16,
+    bnb_4bit_quant_type="nf4"
+)
+# Load model with optimizations
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=CACHE_DIR)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
+    quantization_config=quant_config,
     device_map="auto",
     torch_dtype=torch.float16,
     cache_dir=CACHE_DIR
 )
 def solve_math(question):
+    prompt = f"""Solve this math problem step by step:
+Question: {question}
+Answer:"""
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    outputs = model.generate(
+        **inputs,
+        max_new_tokens=256,
+        temperature=0.7,
+        do_sample=True
+    )
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
 # Gradio Interface
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""<h1><center>🧮 Gemma-2B Math Solver</center></h1>""")
+    with gr.Row():
+        question = gr.Textbox(
+            label="Enter your math problem",
+            placeholder="e.g., What is the derivative of x^2?",
+            lines=3
+        )
     with gr.Row():
+        submit_btn = gr.Button("Solve", variant="primary")
     with gr.Row():
+        answer = gr.Textbox(
+            label="Solution",
+            lines=5,
+            interactive=False
+        )
+    submit_btn.click(
+        fn=solve_math,
+        inputs=question,
+        outputs=answer
+    )
 if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
+    )