Scaryscar commited on
Commit
e79949d
·
verified ·
1 Parent(s): d1ad47d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -19
app.py CHANGED
@@ -3,7 +3,7 @@ import gradio as gr
3
  import torch
4
  import os
5
 
6
- # ===== AUTO DEVICE CONFIGURATION =====
7
  def get_best_device():
8
  if torch.cuda.is_available():
9
  torch.backends.cudnn.benchmark = True
@@ -11,47 +11,53 @@ def get_best_device():
11
  return -1, torch.float32 # CPU fallback
12
 
13
  device, dtype = get_best_device()
14
- device_name = torch.cuda.get_device_name(0) if device == 0 else "CPU"
15
  print(f"⚡ Running on: {device_name}")
16
 
17
- # ===== OPTIMIZED MODEL LOADING =====
18
  try:
 
19
  model = pipeline(
20
- task="text-generation",
21
  model="google/gemma-2b-it",
22
  device=device,
23
  torch_dtype=dtype,
24
  model_kwargs={
25
  "low_cpu_mem_usage": True,
26
- "trust_remote_code": True # Moved here to fix the error
27
  }
28
  )
 
29
  # Pre-warm model
30
  model("Warmup", max_new_tokens=1)
 
31
  except Exception as e:
32
- raise RuntimeError(f"❌ Model loading failed. Check your GPU settings.\nError: {str(e)}")
 
 
 
 
 
 
33
 
34
- # ===== ULTRA-FAST GENERATION =====
35
  def generate(prompt):
36
  try:
37
  return model(
38
  prompt,
39
- max_new_tokens=60, # Optimal for speed
40
  temperature=0.2,
41
  do_sample=False,
42
  pad_token_id=model.tokenizer.eos_token_id
43
  )[0]['generated_text']
44
  except Exception as e:
45
- return f"⚠️ Error (but UI keeps working): {str(e)}"
46
 
47
- # ===== BULLETPROOF INTERFACE =====
48
- with gr.Blocks(title="⚡ Lightning AI (1-2sec responses)") as demo:
49
- gr.Markdown("## Ask anything, get instant answers")
50
- with gr.Row():
51
- inp = gr.Textbox(placeholder="Type here...", label="Input")
52
- with gr.Row():
53
- out = gr.Textbox(label="Instant Answer", interactive=False)
54
- inp.submit(generate, inp, out)
55
 
56
- if __name__ == "__main__":
57
- demo.launch(server_name="0.0.0.0")
 
3
  import torch
4
  import os
5
 
6
+ # ===== SMART DEVICE CONFIGURATION =====
7
  def get_best_device():
8
  if torch.cuda.is_available():
9
  torch.backends.cudnn.benchmark = True
 
11
  return -1, torch.float32 # CPU fallback
12
 
13
  device, dtype = get_best_device()
14
+ device_name = "GPU: " + torch.cuda.get_device_name(0) if device == 0 else "CPU"
15
  print(f"⚡ Running on: {device_name}")
16
 
17
+ # ===== ERROR-PROOF MODEL LOADING =====
18
  try:
19
+ # Correct pipeline configuration (fixed trust_remote_code)
20
  model = pipeline(
21
+ "text-generation",
22
  model="google/gemma-2b-it",
23
  device=device,
24
  torch_dtype=dtype,
25
  model_kwargs={
26
  "low_cpu_mem_usage": True,
27
+ "trust_remote_code": True # Correct placement
28
  }
29
  )
30
+
31
  # Pre-warm model
32
  model("Warmup", max_new_tokens=1)
33
+
34
  except Exception as e:
35
+ # Simplified fallback (removes duplicate trust_remote_code)
36
+ model = pipeline(
37
+ "text-generation",
38
+ model="google/gemma-2b-it",
39
+ device=device,
40
+ torch_dtype=dtype
41
+ )
42
 
43
+ # ===== OPTIMIZED GENERATION =====
44
  def generate(prompt):
45
  try:
46
  return model(
47
  prompt,
48
+ max_new_tokens=60,
49
  temperature=0.2,
50
  do_sample=False,
51
  pad_token_id=model.tokenizer.eos_token_id
52
  )[0]['generated_text']
53
  except Exception as e:
54
+ return f"⚠️ Error: {str(e)}"
55
 
56
+ # ===== SIMPLE INTERFACE =====
57
+ with gr.Blocks() as demo:
58
+ gr.Markdown("## Ask anything (1-2 second responses)")
59
+ input = gr.Textbox(label="Your question")
60
+ output = gr.Textbox(label="Answer")
61
+ input.submit(generate, input, output)
 
 
62
 
63
+ demo.launch(server_name="0.0.0.0")