Scaryscar commited on
Commit
efdea71
·
verified ·
1 Parent(s): 9394ff4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -48
app.py CHANGED
@@ -1,29 +1,34 @@
1
- from transformers import pipeline
 
2
  import gradio as gr
3
  import torch
4
  import os
 
5
 
6
- # ===== AUTO-DEVICE CONFIGURATION =====
 
 
 
7
  def configure_device():
8
- """Smart device selection with performance optimizations"""
9
  if torch.cuda.is_available():
10
- os.environ["CUDA_VISIBLE_DEVICES"] = "0"
11
- torch.backends.cudnn.benchmark = True # Auto-tunes CUDA
12
- return 0, torch.float16 # GPU with half-precision
13
-
14
- # Optimized CPU configuration
15
- torch.set_num_threads(min(4, os.cpu_count() or 1))
16
- return -1, torch.float32
17
 
18
  device, dtype = configure_device()
19
- device_name = "GPU: " + torch.cuda.get_device_name(0) if device == 0 else "CPU"
20
  print(f"⚡ Running on: {device_name} | Precision: {dtype}")
21
 
22
- # ===== BULLETPROOF MODEL LOADING =====
 
23
  try:
 
24
  model = pipeline(
25
  task="text-generation",
26
- model="google/gemma-2b-it", # Fast 2B parameter model
 
27
  device=device,
28
  torch_dtype=dtype,
29
  model_kwargs={
@@ -31,51 +36,105 @@ try:
31
  "trust_remote_code": True
32
  }
33
  )
34
-
35
- # Pre-warm model (critical for fast first response)
36
- model("Warming up...", max_new_tokens=1)
37
-
38
  except Exception as e:
39
- # Fallback to CPU if GPU fails
40
- print(f"⚠️ GPU failed, falling back to CPU: {str(e)}")
41
- device, dtype = -1, torch.float32
42
- model = pipeline(
43
- task="text-generation",
44
- model="google/gemma-2b-it",
45
- device=device,
46
- torch_dtype=dtype
47
- )
48
 
49
- # ===== ULTRA-FAST GENERATION =====
50
- def generate(prompt):
51
- """Guaranteed fast response (1-2 seconds)"""
 
 
52
  try:
53
- return model(
54
- prompt,
55
- max_new_tokens=50, # Optimal for speed
56
- temperature=0.1, # More deterministic
57
- do_sample=False, # Disable sampling for speed
58
- pad_token_id=model.tokenizer.eos_token_id
59
- )[0]['generated_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  except Exception as e:
61
- return f"🔴 Error (but UI keeps working): {str(e)}"
62
 
63
- # ===== LIGHTNING-FAST INTERFACE =====
64
- with gr.Blocks(title=" Instant AI (1-2s responses)") as demo:
65
- gr.Markdown("## Type anything for instant answers:")
 
 
 
66
  with gr.Row():
67
- inp = gr.Textbox(placeholder="How does photosynthesis work?",
68
- lines=2,
69
- max_lines=3)
 
 
 
 
 
 
 
 
 
 
70
  with gr.Row():
71
- out = gr.Textbox(label="Answer appears here (1-2 seconds)",
72
- lines=5)
73
- inp.submit(generate, inp, out)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
- # ===== FAILSAFE LAUNCH =====
76
  if __name__ == "__main__":
77
  demo.launch(
78
  server_name="0.0.0.0",
79
  server_port=7860,
80
- show_error=True
81
  )
 
1
+ from transformers import pipeline, AutoTokenizer
2
+ from huggingface_hub import login
3
  import gradio as gr
4
  import torch
5
  import os
6
+ import time
7
 
8
+ # ===== AUTHENTICATION =====
9
+ login(token=os.environ.get("HF_TOKEN", "your_hf_token_here")) # Set in Space secrets
10
+
11
+ # ===== SMART DEVICE CONFIGURATION =====
12
  def configure_device():
13
+ """Auto-configure GPU/CPU with optimizations"""
14
  if torch.cuda.is_available():
15
+ torch.backends.cudnn.benchmark = True
16
+ return 0, torch.float16 # GPU with half precision
17
+ torch.set_num_threads(os.cpu_count() or 4)
18
+ return -1, torch.float32 # CPU fallback
 
 
 
19
 
20
  device, dtype = configure_device()
21
+ device_name = f"{'GPU: ' + torch.cuda.get_device_name(0) if device == 0 else 'CPU'}"
22
  print(f"⚡ Running on: {device_name} | Precision: {dtype}")
23
 
24
+ # ===== MODEL LOADING =====
25
+ MODEL_NAME = "google/gemma-2b-it"
26
  try:
27
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
28
  model = pipeline(
29
  task="text-generation",
30
+ model=MODEL_NAME,
31
+ tokenizer=tokenizer,
32
  device=device,
33
  torch_dtype=dtype,
34
  model_kwargs={
 
36
  "trust_remote_code": True
37
  }
38
  )
39
+ # Warm-up
40
+ model("Explain 2+2:", max_new_tokens=10)
 
 
41
  except Exception as e:
42
+ raise gr.Error(f"""🚨 Model loading failed. Ensure:
43
+ 1. You accepted Gemma's terms at: https://huggingface.co/{MODEL_NAME}
44
+ 2. HF_TOKEN is set in Space secrets
45
+ Error: {str(e)}""")
 
 
 
 
 
46
 
47
+ # ===== ENHANCED GENERATION =====
48
+ def generate_with_explanation(prompt):
49
+ """Generate step-by-step explanations with performance monitoring"""
50
+ start_time = time.time()
51
+
52
  try:
53
+ # Enhanced prompt for step-by-step answers
54
+ enhanced_prompt = f"""Explain step-by-step in detail:
55
+
56
+ Question: {prompt}
57
+
58
+ Answer:"""
59
+
60
+ # Generation with optimized parameters
61
+ output = model(
62
+ enhanced_prompt,
63
+ max_new_tokens=150,
64
+ temperature=0.3,
65
+ top_k=50,
66
+ do_sample=True,
67
+ pad_token_id=tokenizer.eos_token_id
68
+ )
69
+
70
+ # Extract and format response
71
+ full_response = output[0]['generated_text']
72
+ answer = full_response.split("Answer:")[-1].strip()
73
+
74
+ # Performance metrics
75
+ gen_time = time.time() - start_time
76
+ tokens_sec = len(answer.split()) / gen_time
77
+
78
+ return f"""{answer}
79
+
80
+ ⏱️ Generated in {gen_time:.2f}s ({tokens_sec:.1f} tokens/sec)"""
81
+
82
  except Exception as e:
83
+ return f" Error: {str(e)}"
84
 
85
+ # ===== ADVANCED INTERFACE =====
86
+ with gr.Blocks(theme=gr.themes.Soft(), title="🧠 AI Tutor with Step-by-Step Explanations") as demo:
87
+ # Header
88
+ gr.Markdown("""<h1><center>Step-by-Step AI Tutor</center></h1>""")
89
+
90
+ # Input Section
91
  with gr.Row():
92
+ input_box = gr.Textbox(
93
+ label="Ask anything",
94
+ placeholder="E.g. 'Explain quantum computing basics'",
95
+ lines=3,
96
+ max_lines=5
97
+ )
98
+
99
+ # Control Panel
100
+ with gr.Row():
101
+ submit_btn = gr.Button("Generate Explanation", variant="primary")
102
+ clear_btn = gr.Button("Clear")
103
+
104
+ # Output Section
105
  with gr.Row():
106
+ output_box = gr.Textbox(
107
+ label="Detailed Explanation",
108
+ lines=10,
109
+ interactive=False
110
+ )
111
+
112
+ # Examples
113
+ gr.Examples(
114
+ examples=[
115
+ "Explain how photosynthesis works step by step",
116
+ "Solve 3x + 5 = 20 showing each step",
117
+ "Describe the water cycle with bullet points"
118
+ ],
119
+ inputs=input_box
120
+ )
121
+
122
+ # Event Handlers
123
+ submit_btn.click(
124
+ fn=generate_with_explanation,
125
+ inputs=input_box,
126
+ outputs=output_box
127
+ )
128
+ clear_btn.click(
129
+ fn=lambda: ("", ""),
130
+ inputs=None,
131
+ outputs=[input_box, output_box]
132
+ )
133
 
134
+ # ===== LAUNCH =====
135
  if __name__ == "__main__":
136
  demo.launch(
137
  server_name="0.0.0.0",
138
  server_port=7860,
139
+ share=False
140
  )