Spaces:

Scaryscar
/

Math-charting-model

Sleeping

App Files Files Community

Scaryscar commited on Jul 27, 2025

Commit

e79949d

verified ·

1 Parent(s): d1ad47d

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -19

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import gradio as gr
 import torch
 import os
-# ===== AUTO DEVICE CONFIGURATION =====
 def get_best_device():
     if torch.cuda.is_available():
         torch.backends.cudnn.benchmark = True
@@ -11,47 +11,53 @@ def get_best_device():
     return -1, torch.float32  # CPU fallback
 device, dtype = get_best_device()
-device_name = torch.cuda.get_device_name(0) if device == 0 else "CPU"
 print(f"⚡ Running on: {device_name}")
-# ===== OPTIMIZED MODEL LOADING =====
 try:
     model = pipeline(
-        task="text-generation",
         model="google/gemma-2b-it",
         device=device,
         torch_dtype=dtype,
         model_kwargs={
             "low_cpu_mem_usage": True,
-            "trust_remote_code": True  # Moved here to fix the error
         }
     )
     # Pre-warm model
     model("Warmup", max_new_tokens=1)
 except Exception as e:
-    raise RuntimeError(f"❌ Model loading failed. Check your GPU settings.\nError: {str(e)}")
-# ===== ULTRA-FAST GENERATION =====
 def generate(prompt):
     try:
         return model(
             prompt,
-            max_new_tokens=60,  # Optimal for speed
             temperature=0.2,
             do_sample=False,
             pad_token_id=model.tokenizer.eos_token_id
         )[0]['generated_text']
     except Exception as e:
-        return f"⚠️ Error (but UI keeps working): {str(e)}"
-# ===== BULLETPROOF INTERFACE =====
-with gr.Blocks(title="⚡ Lightning AI (1-2sec responses)") as demo:
-    gr.Markdown("## Ask anything, get instant answers")
-    with gr.Row():
-        inp = gr.Textbox(placeholder="Type here...", label="Input")
-    with gr.Row():
-        out = gr.Textbox(label="Instant Answer", interactive=False)
-    inp.submit(generate, inp, out)
-if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0")

 import torch
 import os
+# ===== SMART DEVICE CONFIGURATION =====
 def get_best_device():
     if torch.cuda.is_available():
         torch.backends.cudnn.benchmark = True
     return -1, torch.float32  # CPU fallback
 device, dtype = get_best_device()
+device_name = "GPU: " + torch.cuda.get_device_name(0) if device == 0 else "CPU"
 print(f"⚡ Running on: {device_name}")
+# ===== ERROR-PROOF MODEL LOADING =====
 try:
+    # Correct pipeline configuration (fixed trust_remote_code)
     model = pipeline(
+        "text-generation",
         model="google/gemma-2b-it",
         device=device,
         torch_dtype=dtype,
         model_kwargs={
             "low_cpu_mem_usage": True,
+            "trust_remote_code": True  # Correct placement
         }
     )
     # Pre-warm model
     model("Warmup", max_new_tokens=1)
 except Exception as e:
+    # Simplified fallback (removes duplicate trust_remote_code)
+    model = pipeline(
+        "text-generation",
+        model="google/gemma-2b-it",
+        device=device,
+        torch_dtype=dtype
+    )
+# ===== OPTIMIZED GENERATION =====
 def generate(prompt):
     try:
         return model(
             prompt,
+            max_new_tokens=60,
             temperature=0.2,
             do_sample=False,
             pad_token_id=model.tokenizer.eos_token_id
         )[0]['generated_text']
     except Exception as e:
+        return f"⚠️ Error: {str(e)}"
+# ===== SIMPLE INTERFACE =====
+with gr.Blocks() as demo:
+    gr.Markdown("## Ask anything (1-2 second responses)")
+    input = gr.Textbox(label="Your question")
+    output = gr.Textbox(label="Answer")
+    input.submit(generate, input, output)
+demo.launch(server_name="0.0.0.0")