Update app.py
Browse files
app.py
CHANGED
|
@@ -4,19 +4,7 @@ from arabert.aragpt2.grover.modeling_gpt2 import GPT2LMHeadModel
|
|
| 4 |
from transformers import AutoTokenizer
|
| 5 |
import re
|
| 6 |
import torch
|
| 7 |
-
import
|
| 8 |
-
|
| 9 |
-
# For ZeroGPU on Hugging Face
|
| 10 |
-
# Checking if we're on Hugging Face infrastructure
|
| 11 |
-
HF_SPACE = os.environ.get("SPACE_ID") is not None
|
| 12 |
-
|
| 13 |
-
# Get appropriate device for ZeroGPU
|
| 14 |
-
if torch.cuda.is_available() and HF_SPACE:
|
| 15 |
-
device = 0 # For ZeroGPU, use device index 0
|
| 16 |
-
print("Using ZeroGPU on Hugging Face")
|
| 17 |
-
else:
|
| 18 |
-
device = -1 # CPU fallback
|
| 19 |
-
print("Using CPU")
|
| 20 |
|
| 21 |
model_name = "Naseej/AskMe-Large"
|
| 22 |
tokenizer = AutoTokenizer.from_pretrained(model_name, bos_token='<|startoftext|>',
|
|
@@ -24,10 +12,16 @@ tokenizer = AutoTokenizer.from_pretrained(model_name, bos_token='<|startoftext|>
|
|
| 24 |
model = GPT2LMHeadModel.from_pretrained(model_name)
|
| 25 |
model.resize_token_embeddings(len(tokenizer))
|
| 26 |
|
| 27 |
-
#
|
| 28 |
-
|
|
|
|
| 29 |
|
|
|
|
|
|
|
| 30 |
def generate_response(message, history, num_beams=4, temperature=0.99, do_sample=True, top_k=60, top_p=0.9):
|
|
|
|
|
|
|
|
|
|
| 31 |
prompt = f'Prompt: {message}\nAnswer:'
|
| 32 |
pred_text = generator(prompt,
|
| 33 |
pad_token_id=tokenizer.eos_token_id,
|
|
@@ -45,6 +39,8 @@ def generate_response(message, history, num_beams=4, temperature=0.99, do_sample
|
|
| 45 |
except:
|
| 46 |
pred_sentiment = "لم أستطع توليد إجابة. يرجى إعادة صياغة السؤال."
|
| 47 |
|
|
|
|
|
|
|
| 48 |
return pred_sentiment
|
| 49 |
|
| 50 |
# Properly format the chat message handler
|
|
@@ -105,8 +101,4 @@ with gr.Blocks(css=css) as demo:
|
|
| 105 |
|
| 106 |
clear_btn.click(lambda: None, None, chatbot, queue=False)
|
| 107 |
|
| 108 |
-
|
| 109 |
-
if HF_SPACE:
|
| 110 |
-
demo.launch(share=False, server_name="0.0.0.0")
|
| 111 |
-
else:
|
| 112 |
-
demo.launch()
|
|
|
|
| 4 |
from transformers import AutoTokenizer
|
| 5 |
import re
|
| 6 |
import torch
|
| 7 |
+
import spaces # Import the spaces module for ZeroGPU
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
model_name = "Naseej/AskMe-Large"
|
| 10 |
tokenizer = AutoTokenizer.from_pretrained(model_name, bos_token='<|startoftext|>',
|
|
|
|
| 12 |
model = GPT2LMHeadModel.from_pretrained(model_name)
|
| 13 |
model.resize_token_embeddings(len(tokenizer))
|
| 14 |
|
| 15 |
+
# For ZeroGPU, we'll move the model to CUDA inside the decorated function
|
| 16 |
+
# Create the generator pipeline without specifying device
|
| 17 |
+
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
| 18 |
|
| 19 |
+
# ZeroGPU-decorated function for text generation
|
| 20 |
+
@spaces.GPU(duration=60) # Set duration based on your needs
|
| 21 |
def generate_response(message, history, num_beams=4, temperature=0.99, do_sample=True, top_k=60, top_p=0.9):
|
| 22 |
+
# Move model to CUDA inside the decorated function
|
| 23 |
+
generator.model = generator.model.to('cuda')
|
| 24 |
+
|
| 25 |
prompt = f'Prompt: {message}\nAnswer:'
|
| 26 |
pred_text = generator(prompt,
|
| 27 |
pad_token_id=tokenizer.eos_token_id,
|
|
|
|
| 39 |
except:
|
| 40 |
pred_sentiment = "لم أستطع توليد إجابة. يرجى إعادة صياغة السؤال."
|
| 41 |
|
| 42 |
+
# Move model back to CPU to free GPU memory
|
| 43 |
+
generator.model = generator.model.to('cpu')
|
| 44 |
return pred_sentiment
|
| 45 |
|
| 46 |
# Properly format the chat message handler
|
|
|
|
| 101 |
|
| 102 |
clear_btn.click(lambda: None, None, chatbot, queue=False)
|
| 103 |
|
| 104 |
+
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|