Spaces:

Scaryscar
/

Maths-chartingmodel

Sleeping

App Files Files Community

Scaryscar commited on Jul 27, 2025

Commit

7ed9e69

verified ·

1 Parent(s): 04aa32f

Create app.py

Browse files

Files changed (1) hide show

app.py +81 -0

app.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from transformers import pipeline
+import gradio as gr
+import torch
+import os
+# ===== AUTO-DEVICE CONFIGURATION =====
+def configure_device():
+    """Smart device selection with performance optimizations"""
+    if torch.cuda.is_available():
+        os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+        torch.backends.cudnn.benchmark = True  # Auto-tunes CUDA
+        return 0, torch.float16  # GPU with half-precision
+    # Optimized CPU configuration
+    torch.set_num_threads(min(4, os.cpu_count() or 1))
+    return -1, torch.float32
+device, dtype = configure_device()
+device_name = "GPU: " + torch.cuda.get_device_name(0) if device == 0 else "CPU"
+print(f"⚡ Running on: {device_name} | Precision: {dtype}")
+# ===== BULLETPROOF MODEL LOADING =====
+try:
+    model = pipeline(
+        task="text-generation",
+        model="google/gemma-2b-it",  # Fast 2B parameter model
+        device=device,
+        torch_dtype=dtype,
+        model_kwargs={
+            "low_cpu_mem_usage": True,
+            "trust_remote_code": True
+        }
+    )
+    # Pre-warm model (critical for fast first response)
+    model("Warming up...", max_new_tokens=1)
+except Exception as e:
+    # Fallback to CPU if GPU fails
+    print(f"⚠️ GPU failed, falling back to CPU: {str(e)}")
+    device, dtype = -1, torch.float32
+    model = pipeline(
+        task="text-generation",
+        model="google/gemma-2b-it",
+        device=device,
+        torch_dtype=dtype
+    )
+# ===== ULTRA-FAST GENERATION =====
+def generate(prompt):
+    """Guaranteed fast response (1-2 seconds)"""
+    try:
+        return model(
+            prompt,
+            max_new_tokens=50,  # Optimal for speed
+            temperature=0.1,    # More deterministic
+            do_sample=False,    # Disable sampling for speed
+            pad_token_id=model.tokenizer.eos_token_id
+        )[0]['generated_text']
+    except Exception as e:
+        return f"🔴 Error (but UI keeps working): {str(e)}"
+# ===== LIGHTNING-FAST INTERFACE =====
+with gr.Blocks(title="⚡ Instant AI (1-2s responses)") as demo:
+    gr.Markdown("## Type anything for instant answers:")
+    with gr.Row():
+        inp = gr.Textbox(placeholder="How does photosynthesis work?",
+                        lines=2,
+                        max_lines=3)
+    with gr.Row():
+        out = gr.Textbox(label="Answer appears here (1-2 seconds)",
+                        lines=5)
+    inp.submit(generate, inp, out)
+# ===== FAILSAFE LAUNCH =====
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True
+    )