Scaryscar commited on
Commit
bba9bff
·
verified ·
1 Parent(s): 85e915a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -31
app.py CHANGED
@@ -1,44 +1,62 @@
1
- from transformers import pipeline
2
- import gradio as gr
3
  import torch
 
 
4
 
5
- # Verify GPU
6
  if not torch.cuda.is_available():
7
- raise RuntimeError("Enable GPU in Space settings")
 
 
 
 
8
 
9
- # Load model (without bitsandbytes)
10
- model = pipeline(
 
 
 
 
 
 
 
 
 
11
  "text-generation",
12
- model="google/gemma-2b-it",
13
- device=0, # Force GPU
14
- torch_dtype=torch.float16,
15
- model_kwargs={
16
- "low_cpu_mem_usage": True,
17
- "trust_remote_code": True
18
- }
19
  )
20
 
21
- def solve_math(question):
22
- prompt = f"Solve step by step:\nQ: {question}\nA:"
23
  try:
24
- result = model(
25
  prompt,
26
- max_new_tokens=100,
27
- temperature=0.3,
28
- do_sample=False
 
29
  )
30
- return result[0]['generated_text'].split("A:")[-1].strip()
31
  except Exception as e:
32
- return f"Error: {str(e)}"
33
-
34
- # Preload
35
- solve_math("2+2=")
36
 
37
- # Simple UI
38
- with gr.Blocks() as demo:
39
- gr.Markdown("## Math Solver")
40
- question = gr.Textbox(label="Problem")
41
- answer = gr.Textbox(label="Solution")
42
- question.submit(solve_math, question, answer)
 
 
 
 
 
 
43
 
44
- demo.launch()
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 
2
  import torch
3
+ import gradio as gr
4
+ import os
5
 
6
+ # 1. GPU Verification
7
  if not torch.cuda.is_available():
8
+ raise RuntimeError("❌ GPU not detected! Enable GPU in Space settings.")
9
+ print(f"✅ Using GPU: {torch.cuda.get_device_name(0)}")
10
+
11
+ # 2. Model Configuration
12
+ MODEL_NAME = "google/gemma-2b-it" # Try "mistralai/Mistral-7B-v0.1" for more power
13
 
14
+ # 3. Load Model with GPU Optimization
15
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
16
+ model = AutoModelForCausalLM.from_pretrained(
17
+ MODEL_NAME,
18
+ device_map="auto", # Auto-selects GPU
19
+ torch_dtype=torch.float16, # Half-precision for memory
20
+ low_cpu_mem_usage=True # Reduces CPU overhead
21
+ )
22
+
23
+ # 4. Create GPU-accelerated pipeline
24
+ pipe = pipeline(
25
  "text-generation",
26
+ model=model,
27
+ tokenizer=tokenizer,
28
+ device=0, # Force first GPU
29
+ torch_dtype=torch.float16
 
 
 
30
  )
31
 
32
+ # 5. Generation Function
33
+ def generate_text(prompt):
34
  try:
35
+ outputs = pipe(
36
  prompt,
37
+ max_new_tokens=150,
38
+ temperature=0.7,
39
+ do_sample=True,
40
+ pad_token_id=tokenizer.eos_token_id
41
  )
42
+ return outputs[0]['generated_text']
43
  except Exception as e:
44
+ return f"⚠️ Error: {str(e)}"
 
 
 
45
 
46
+ # 6. Gradio Interface
47
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
48
+ gr.Markdown("## 🚀 GPU-Powered Text Generator")
49
+ with gr.Row():
50
+ inp = gr.Textbox(label="Your Prompt", placeholder="Type here...")
51
+ with gr.Row():
52
+ out = gr.Textbox(label="Generated Text", lines=5)
53
+ with gr.Row():
54
+ btn = gr.Button("Generate", variant="primary")
55
+
56
+ btn.click(fn=generate_text, inputs=inp, outputs=out)
57
+ inp.submit(fn=generate_text, inputs=inp, outputs=out)
58
 
59
+ # 7. Launch with GPU monitoring
60
+ if __name__ == "__main__":
61
+ print(f"GPU Memory Allocated: {torch.cuda.memory_allocated()/1e9:.2f} GB")
62
+ demo.launch(server_name="0.0.0.0")