Scaryscar commited on
Commit
7ed9e69
·
verified ·
1 Parent(s): 04aa32f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -0
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ import gradio as gr
3
+ import torch
4
+ import os
5
+
6
+ # ===== AUTO-DEVICE CONFIGURATION =====
7
+ def configure_device():
8
+ """Smart device selection with performance optimizations"""
9
+ if torch.cuda.is_available():
10
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
11
+ torch.backends.cudnn.benchmark = True # Auto-tunes CUDA
12
+ return 0, torch.float16 # GPU with half-precision
13
+
14
+ # Optimized CPU configuration
15
+ torch.set_num_threads(min(4, os.cpu_count() or 1))
16
+ return -1, torch.float32
17
+
18
+ device, dtype = configure_device()
19
+ device_name = "GPU: " + torch.cuda.get_device_name(0) if device == 0 else "CPU"
20
+ print(f"⚡ Running on: {device_name} | Precision: {dtype}")
21
+
22
+ # ===== BULLETPROOF MODEL LOADING =====
23
+ try:
24
+ model = pipeline(
25
+ task="text-generation",
26
+ model="google/gemma-2b-it", # Fast 2B parameter model
27
+ device=device,
28
+ torch_dtype=dtype,
29
+ model_kwargs={
30
+ "low_cpu_mem_usage": True,
31
+ "trust_remote_code": True
32
+ }
33
+ )
34
+
35
+ # Pre-warm model (critical for fast first response)
36
+ model("Warming up...", max_new_tokens=1)
37
+
38
+ except Exception as e:
39
+ # Fallback to CPU if GPU fails
40
+ print(f"⚠️ GPU failed, falling back to CPU: {str(e)}")
41
+ device, dtype = -1, torch.float32
42
+ model = pipeline(
43
+ task="text-generation",
44
+ model="google/gemma-2b-it",
45
+ device=device,
46
+ torch_dtype=dtype
47
+ )
48
+
49
+ # ===== ULTRA-FAST GENERATION =====
50
+ def generate(prompt):
51
+ """Guaranteed fast response (1-2 seconds)"""
52
+ try:
53
+ return model(
54
+ prompt,
55
+ max_new_tokens=50, # Optimal for speed
56
+ temperature=0.1, # More deterministic
57
+ do_sample=False, # Disable sampling for speed
58
+ pad_token_id=model.tokenizer.eos_token_id
59
+ )[0]['generated_text']
60
+ except Exception as e:
61
+ return f"🔴 Error (but UI keeps working): {str(e)}"
62
+
63
+ # ===== LIGHTNING-FAST INTERFACE =====
64
+ with gr.Blocks(title="⚡ Instant AI (1-2s responses)") as demo:
65
+ gr.Markdown("## Type anything for instant answers:")
66
+ with gr.Row():
67
+ inp = gr.Textbox(placeholder="How does photosynthesis work?",
68
+ lines=2,
69
+ max_lines=3)
70
+ with gr.Row():
71
+ out = gr.Textbox(label="Answer appears here (1-2 seconds)",
72
+ lines=5)
73
+ inp.submit(generate, inp, out)
74
+
75
+ # ===== FAILSAFE LAUNCH =====
76
+ if __name__ == "__main__":
77
+ demo.launch(
78
+ server_name="0.0.0.0",
79
+ server_port=7860,
80
+ show_error=True
81
+ )