Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,26 +7,30 @@ from threading import Thread
|
|
| 7 |
torch.set_num_threads(2)
|
| 8 |
|
| 9 |
# Loading the tokenizer and model from Hugging Face's model hub.
|
| 10 |
-
tokenizer = AutoTokenizer.from_pretrained("cnmoro/
|
| 11 |
-
model = AutoModelForCausalLM.from_pretrained("cnmoro/
|
| 12 |
|
| 13 |
# using CUDA for an optimal experience
|
| 14 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 15 |
model = model.to(device)
|
| 16 |
|
|
|
|
|
|
|
|
|
|
| 17 |
# Function to generate model predictions.
|
| 18 |
def predict(message, history):
|
| 19 |
-
|
|
|
|
| 20 |
model_inputs = tokenizer([
|
| 21 |
-
|
| 22 |
], return_tensors="pt").to(device)
|
| 23 |
|
| 24 |
-
streamer = TextIteratorStreamer(tokenizer, timeout=
|
| 25 |
|
| 26 |
generate_kwargs = dict(
|
| 27 |
model_inputs,
|
| 28 |
streamer=streamer,
|
| 29 |
-
max_new_tokens=
|
| 30 |
top_p=0.2,
|
| 31 |
top_k=20,
|
| 32 |
temperature=0.1,
|
|
@@ -43,6 +47,6 @@ def predict(message, history):
|
|
| 43 |
|
| 44 |
# Setting up the Gradio chat interface.
|
| 45 |
gr.ChatInterface(predict,
|
| 46 |
-
title="
|
| 47 |
description="Pass a text to be structurized"
|
| 48 |
).launch() # Launching the web interface.
|
|
|
|
| 7 |
torch.set_num_threads(2)
|
| 8 |
|
| 9 |
# Loading the tokenizer and model from Hugging Face's model hub.
|
| 10 |
+
tokenizer = AutoTokenizer.from_pretrained("cnmoro/teenytinyllama-160m-text-simplification-ptbr")
|
| 11 |
+
model = AutoModelForCausalLM.from_pretrained("cnmoro/teenytinyllama-160m-text-simplification-ptbr")
|
| 12 |
|
| 13 |
# using CUDA for an optimal experience
|
| 14 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 15 |
model = model.to(device)
|
| 16 |
|
| 17 |
+
def count_tokens(text):
|
| 18 |
+
return len(tokenizer.tokenize(text))
|
| 19 |
+
|
| 20 |
# Function to generate model predictions.
|
| 21 |
def predict(message, history):
|
| 22 |
+
|
| 23 |
+
formatted_prompt = f"<s><system>O objetivo é comprimir e estruturar o texto a seguir<texto>{message}</texto>"
|
| 24 |
model_inputs = tokenizer([
|
| 25 |
+
formatted_prompt
|
| 26 |
], return_tensors="pt").to(device)
|
| 27 |
|
| 28 |
+
streamer = TextIteratorStreamer(tokenizer, timeout=120., skip_prompt=True, skip_special_tokens=True)
|
| 29 |
|
| 30 |
generate_kwargs = dict(
|
| 31 |
model_inputs,
|
| 32 |
streamer=streamer,
|
| 33 |
+
max_new_tokens=3072 - count_tokens(formatted_prompt),
|
| 34 |
top_p=0.2,
|
| 35 |
top_k=20,
|
| 36 |
temperature=0.1,
|
|
|
|
| 47 |
|
| 48 |
# Setting up the Gradio chat interface.
|
| 49 |
gr.ChatInterface(predict,
|
| 50 |
+
title="TextStructurization_TeenyTinyLlama160m_CPU",
|
| 51 |
description="Pass a text to be structurized"
|
| 52 |
).launch() # Launching the web interface.
|