Spaces:

Scaryscar
/

Math-charting-model

Sleeping

App Files Files Community

Scaryscar commited on Jul 27, 2025

Commit

4ccdbe4

verified ·

1 Parent(s): 0df0721

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -31

app.py CHANGED Viewed

@@ -1,46 +1,79 @@
 from transformers import pipeline
 import gradio as gr
 import torch
-# Auto-configure GPU/CPU
-device = 0 if torch.cuda.is_available() else -1
-dtype = torch.float16 if device == 0 else torch.float32
-print(f"⚡ Using {'GPU: ' + torch.cuda.get_device_name(0) if device == 0 else 'CPU'}")
-# Load optimized pipeline
-model = pipeline(
-    "text-generation",
-    model="google/gemma-2b-it",
-    device=device,
-    torch_dtype=dtype,
-    model_kwargs={
-        "low_cpu_mem_usage": True,
-        "trust_remote_code": True
-    }
-)
-# Pre-warm model (reduces first response time)
-model("Warming up...", max_new_tokens=1)
 def generate(prompt):
-    """Ultra-fast generation with 1-2 second responses"""
     try:
-        output = model(
             prompt,
-            max_new_tokens=80,  # Shorter = faster
-            temperature=0.3,    # More deterministic
             do_sample=False,    # Disable sampling for speed
             pad_token_id=model.tokenizer.eos_token_id
-        )
-        return output[0]['generated_text']
     except Exception as e:
-        return f"Error: {str(e)}"
-# Minimal UI for maximum speed
-with gr.Blocks(title="🚀 Instant AI") as demo:
-    gr.Markdown("## Type anything (1-2 sec responses):")
-    input = gr.Textbox(placeholder="How to make pizza?")
-    output = gr.Textbox()
-    input.submit(generate, input, output)
-demo.launch(server_name="0.0.0.0")

 from transformers import pipeline
 import gradio as gr
 import torch
+import os
+# ========== AUTO GPU OPTIMIZATION ==========
+def configure_device():
+    """Automatically configure the best available device"""
+    if torch.cuda.is_available():
+        os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # Force GPU 0
+        torch.backends.cudnn.benchmark = True  # Optimize CUDA
+        return 0, torch.float16  # GPU with half precision
+    # Fallback to CPU with optimizations
+    torch.set_num_threads(os.cpu_count() or 4)
+    return -1, torch.float32
+device, dtype = configure_device()
+print(f"⚡ Using {'GPU: ' + torch.cuda.get_device_name(0) if device == 0 else 'CPU'}")
+# ========== ULTRA-FAST MODEL LOADING ==========
+try:
+    # Load with all optimizations
+    model = pipeline(
+        "text-generation",
+        model="google/gemma-2b-it",
+        device=device,
+        torch_dtype=dtype,
+        model_kwargs={
+            "low_cpu_mem_usage": True,
+            "trust_remote_code": True
+        }
+    )
+    # Pre-warm model (critical for fast first response)
+    model("Warming up...", max_new_tokens=1)
+except Exception as e:
+    raise RuntimeError(f"Model loading failed: {str(e)}")
+# ========== OPTIMIZED GENERATION ==========
 def generate(prompt):
+    """1-2 second response guaranteed"""
     try:
+        return model(
             prompt,
+            max_new_tokens=60,  # Optimal for speed
+            temperature=0.2,    # More deterministic
             do_sample=False,    # Disable sampling for speed
             pad_token_id=model.tokenizer.eos_token_id
+        )[0]['generated_text']
     except Exception as e:
+        return f"🚨 Error (but UI won't crash): {str(e)}"
+# ========== BULLETPROOF UI ==========
+with gr.Blocks(title="⚡ Instant AI (1-2sec responses)") as demo:
+    gr.Markdown("""<h1><center>Ask me anything!</center></h1>""")
+    with gr.Row():
+        inp = gr.Textbox(placeholder="Type here...",
+                        label="Input",
+                        max_lines=3)
+    with gr.Row():
+        out = gr.Textbox(label="Output (1-2sec)",
+                        interactive=False)
+    # Dual submission methods
+    inp.submit(fn=generate, inputs=inp, outputs=out)
+    btn = gr.Button("Submit")
+    btn.click(fn=generate, inputs=inp, outputs=out)
+# ========== FAILSAFE LAUNCH ==========
+if __name__ == "__main__":
+    try:
+        demo.launch(server_name="0.0.0.0")
+    except Exception as e:
+        print(f"Server error: {str(e)}")
+        print("Attempting to restart...")
+        demo.launch(server_name="0.0.0.0", share=True)