mobarmg commited on
Commit
e157ec5
·
verified ·
1 Parent(s): f4753f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -20
app.py CHANGED
@@ -4,19 +4,7 @@ from arabert.aragpt2.grover.modeling_gpt2 import GPT2LMHeadModel
4
  from transformers import AutoTokenizer
5
  import re
6
  import torch
7
- import os
8
-
9
- # For ZeroGPU on Hugging Face
10
- # Checking if we're on Hugging Face infrastructure
11
- HF_SPACE = os.environ.get("SPACE_ID") is not None
12
-
13
- # Get appropriate device for ZeroGPU
14
- if torch.cuda.is_available() and HF_SPACE:
15
- device = 0 # For ZeroGPU, use device index 0
16
- print("Using ZeroGPU on Hugging Face")
17
- else:
18
- device = -1 # CPU fallback
19
- print("Using CPU")
20
 
21
  model_name = "Naseej/AskMe-Large"
22
  tokenizer = AutoTokenizer.from_pretrained(model_name, bos_token='<|startoftext|>',
@@ -24,10 +12,16 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, bos_token='<|startoftext|>
24
  model = GPT2LMHeadModel.from_pretrained(model_name)
25
  model.resize_token_embeddings(len(tokenizer))
26
 
27
- # Configure pipeline with the device parameter for ZeroGPU
28
- generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
 
29
 
 
 
30
  def generate_response(message, history, num_beams=4, temperature=0.99, do_sample=True, top_k=60, top_p=0.9):
 
 
 
31
  prompt = f'Prompt: {message}\nAnswer:'
32
  pred_text = generator(prompt,
33
  pad_token_id=tokenizer.eos_token_id,
@@ -45,6 +39,8 @@ def generate_response(message, history, num_beams=4, temperature=0.99, do_sample
45
  except:
46
  pred_sentiment = "لم أستطع توليد إجابة. يرجى إعادة صياغة السؤال."
47
 
 
 
48
  return pred_sentiment
49
 
50
  # Properly format the chat message handler
@@ -105,8 +101,4 @@ with gr.Blocks(css=css) as demo:
105
 
106
  clear_btn.click(lambda: None, None, chatbot, queue=False)
107
 
108
- # For ZeroGPU on Hugging Face, we need to use specific launch parameters
109
- if HF_SPACE:
110
- demo.launch(share=False, server_name="0.0.0.0")
111
- else:
112
- demo.launch()
 
4
  from transformers import AutoTokenizer
5
  import re
6
  import torch
7
+ import spaces # Import the spaces module for ZeroGPU
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  model_name = "Naseej/AskMe-Large"
10
  tokenizer = AutoTokenizer.from_pretrained(model_name, bos_token='<|startoftext|>',
 
12
  model = GPT2LMHeadModel.from_pretrained(model_name)
13
  model.resize_token_embeddings(len(tokenizer))
14
 
15
+ # For ZeroGPU, we'll move the model to CUDA inside the decorated function
16
+ # Create the generator pipeline without specifying device
17
+ generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
18
 
19
+ # ZeroGPU-decorated function for text generation
20
+ @spaces.GPU(duration=60) # Set duration based on your needs
21
  def generate_response(message, history, num_beams=4, temperature=0.99, do_sample=True, top_k=60, top_p=0.9):
22
+ # Move model to CUDA inside the decorated function
23
+ generator.model = generator.model.to('cuda')
24
+
25
  prompt = f'Prompt: {message}\nAnswer:'
26
  pred_text = generator(prompt,
27
  pad_token_id=tokenizer.eos_token_id,
 
39
  except:
40
  pred_sentiment = "لم أستطع توليد إجابة. يرجى إعادة صياغة السؤال."
41
 
42
+ # Move model back to CPU to free GPU memory
43
+ generator.model = generator.model.to('cpu')
44
  return pred_sentiment
45
 
46
  # Properly format the chat message handler
 
101
 
102
  clear_btn.click(lambda: None, None, chatbot, queue=False)
103
 
104
+ demo.launch()