loocorez commited on
Commit
3a3e45f
Β·
verified Β·
1 Parent(s): 14ad9ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -29
app.py CHANGED
@@ -1,6 +1,6 @@
 
1
  import re, spaces, gradio as gr, torch
2
- from transformers import AutoTokenizer, AutoModelForCausalLM # ⟡ swap import
3
- # from peft import AutoPeftModelForCausalLM (remove)
4
 
5
  MODEL_NAME = "loocorez/reverse-text-warmup"
6
 
@@ -12,50 +12,64 @@ Respond in the following format:
12
  <answer>...</answer>
13
  </response>"""
14
 
 
15
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
16
  tokenizer.pad_token = tokenizer.eos_token
17
 
18
- # load the **full model** directly
19
- model = AutoModelForCausalLM.from_pretrained( # ⟡ use AutoModel
20
  MODEL_NAME,
21
  torch_dtype=torch.float16
22
- ).eval()
23
 
24
  EOS_ID = tokenizer.encode("</response>", add_special_tokens=False)[0]
25
 
26
- @spaces.GPU(duration=60)
27
- def reverse_text(user_text: str,
28
- temperature: float = 0.0,
29
- max_tokens: int = 256):
 
 
 
30
  prompt = f"{SYSTEM_PROMPT}\n\n{user_text.strip()}"
31
- inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
32
-
33
- with torch.no_grad():
34
- out = model.generate(
35
- **inputs,
36
- max_new_tokens=max_tokens,
37
- do_sample=temperature > 0,
38
- temperature=max(temperature, 1e-6),
39
- eos_token_id=EOS_ID,
40
- pad_token_id=tokenizer.eos_token_id,
41
- repetition_penalty=1.1,
42
- )
43
-
44
- gen = tokenizer.decode(out[0][inputs["input_ids"].size(1):],
45
- skip_special_tokens=True)
46
- m = re.search(r"<answer>(.*?)</answer>", gen, re.S)
47
- return m.group(1).strip() if m else gen.strip()
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  demo = gr.Interface(
50
  fn=reverse_text,
51
  inputs=[
52
- gr.Textbox(label="Input Text", lines=3),
53
  gr.Slider(0.0, 1.0, step=0.05, value=0.0, label="Temperature"),
54
- gr.Slider(32, 512, step=32, value=256, label="Max new tokens")
55
  ],
56
  outputs=gr.Textbox(label="Reversed Text", lines=3),
57
  title="πŸ”„ Reverse Text Model Demo",
58
- description="Give it a sentence; it returns the reversed version."
59
  )
60
 
61
  if __name__ == "__main__":
 
1
+ # app.py
2
  import re, spaces, gradio as gr, torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
 
4
 
5
  MODEL_NAME = "loocorez/reverse-text-warmup"
6
 
 
12
  <answer>...</answer>
13
  </response>"""
14
 
15
+ # ── load model & tokenizer once ────────────────────────────────────────────────
16
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
17
  tokenizer.pad_token = tokenizer.eos_token
18
 
19
+ model = AutoModelForCausalLM.from_pretrained(
 
20
  MODEL_NAME,
21
  torch_dtype=torch.float16
22
+ ).eval() # keep weights on CPU until each call
23
 
24
  EOS_ID = tokenizer.encode("</response>", add_special_tokens=False)[0]
25
 
26
+
27
+ @spaces.GPU(duration=60) # ZeroGPU reserves a card only inside this fn
28
+ def reverse_text(
29
+ user_text: str,
30
+ temperature: float = 0.0,
31
+ max_tokens: int = 256,
32
+ ) -> str:
33
  prompt = f"{SYSTEM_PROMPT}\n\n{user_text.strip()}"
34
+
35
+ # ── move model to GPU for this request ────────────────────────────────────
36
+ model.to("cuda")
37
+
38
+ try:
39
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
40
+
41
+ with torch.no_grad():
42
+ out = model.generate(
43
+ **inputs,
44
+ max_new_tokens=max_tokens,
45
+ do_sample=temperature > 0,
46
+ temperature=max(temperature, 1e-6),
47
+ eos_token_id=EOS_ID,
48
+ pad_token_id=tokenizer.eos_token_id,
49
+ repetition_penalty=1.1,
50
+ )
51
+
52
+ gen = tokenizer.decode(out[0][inputs["input_ids"].size(1):],
53
+ skip_special_tokens=True)
54
+ m = re.search(r"<answer>(.*?)</answer>", gen, re.S)
55
+ return m.group(1).strip() if m else gen.strip()
56
+
57
+ finally:
58
+ # ── free the GPU so the next visitor can use it ───────────────────────
59
+ model.to("cpu")
60
+ torch.cuda.empty_cache()
61
+
62
 
63
  demo = gr.Interface(
64
  fn=reverse_text,
65
  inputs=[
66
+ gr.Textbox(label="Input Text", lines=3, placeholder="Enter text…"),
67
  gr.Slider(0.0, 1.0, step=0.05, value=0.0, label="Temperature"),
68
+ gr.Slider(32, 512, step=32, value=256, label="Max new tokens"),
69
  ],
70
  outputs=gr.Textbox(label="Reversed Text", lines=3),
71
  title="πŸ”„ Reverse Text Model Demo",
72
+ description="Give it a sentence; it returns the reversed version.",
73
  )
74
 
75
  if __name__ == "__main__":