loocorez commited on
Commit
c7b71c4
·
verified ·
1 Parent(s): c2f2a8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -13
app.py CHANGED
@@ -1,27 +1,76 @@
1
  import spaces
2
  import gradio as gr
 
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
5
- # Load your model
6
  model_name = "loocorez/reverse-text-warmup"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- model = AutoModelForCausalLM.from_pretrained(model_name)
9
- model.to("cuda")
10
 
 
 
 
 
 
 
 
11
  def reverse_text(input_text):
12
- # Add your model inference logic here
13
- inputs = tokenizer(input_text, return_tensors="pt")
14
- outputs = model.generate(**inputs, max_length=100)
15
- result = tokenizer.decode(outputs[0], skip_special_tokens=True)
16
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- # Create Gradio interface
19
  demo = gr.Interface(
20
  fn=reverse_text,
21
- inputs=gr.Textbox(label="Input Text"),
22
- outputs=gr.Textbox(label="Reversed Text"),
23
- title="Reverse Text Model Demo",
24
- description="Test the reverse-text-warmup model"
 
 
 
 
 
 
 
 
 
 
 
 
25
  )
26
 
27
  demo.launch()
 
1
  import spaces
2
  import gradio as gr
3
+ import torch
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
 
6
+ # Load tokenizer globally (CPU operation)
7
  model_name = "loocorez/reverse-text-warmup"
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
9
 
10
+ # Load model globally but keep on CPU initially
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ model_name,
13
+ torch_dtype=torch.float16 # Use half precision for memory efficiency
14
+ )
15
+
16
+ @spaces.GPU(duration=60) # Reserve GPU for 60 seconds
17
  def reverse_text(input_text):
18
+ # Move model to GPU only when needed
19
+ model.to("cuda")
20
+
21
+ try:
22
+ # Tokenize and move to GPU
23
+ inputs = tokenizer(
24
+ input_text,
25
+ return_tensors="pt",
26
+ max_length=512,
27
+ truncation=True
28
+ ).to("cuda")
29
+
30
+ # Generate
31
+ with torch.no_grad():
32
+ outputs = model.generate(
33
+ **inputs,
34
+ max_new_tokens=100,
35
+ do_sample=True,
36
+ temperature=0.7,
37
+ pad_token_id=tokenizer.eos_token_id,
38
+ eos_token_id=tokenizer.eos_token_id
39
+ )
40
+
41
+ # Decode result
42
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
43
+ generated_text = result[len(input_text):].strip()
44
+
45
+ return generated_text
46
+
47
+ except Exception as e:
48
+ return f"Error: {str(e)}"
49
+
50
+ finally:
51
+ # Move model back to CPU to free GPU memory
52
+ model.to("cpu")
53
+ torch.cuda.empty_cache()
54
 
55
+ # Create interface
56
  demo = gr.Interface(
57
  fn=reverse_text,
58
+ inputs=gr.Textbox(
59
+ label="Input Text",
60
+ placeholder="Enter text to process...",
61
+ lines=3
62
+ ),
63
+ outputs=gr.Textbox(
64
+ label="Generated Text",
65
+ lines=3
66
+ ),
67
+ title="🔄 Reverse Text Model Demo",
68
+ description="Test your custom reverse-text-warmup model using ZeroGPU",
69
+ examples=[
70
+ ["Hello world"],
71
+ ["The quick brown fox"],
72
+ ["Machine learning is amazing"]
73
+ ]
74
  )
75
 
76
  demo.launch()