cnmoro commited on
Commit
aac0c09
·
verified ·
1 Parent(s): e4d4848

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -7,26 +7,30 @@ from threading import Thread
7
  torch.set_num_threads(2)
8
 
9
  # Loading the tokenizer and model from Hugging Face's model hub.
10
- tokenizer = AutoTokenizer.from_pretrained("cnmoro/jack-68m-text-structurization")
11
- model = AutoModelForCausalLM.from_pretrained("cnmoro/jack-68m-text-structurization")
12
 
13
  # using CUDA for an optimal experience
14
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
15
  model = model.to(device)
16
 
 
 
 
17
  # Function to generate model predictions.
18
  def predict(message, history):
19
-
 
20
  model_inputs = tokenizer([
21
- f"### Structurize: {message}\n\n### Response:\n"
22
  ], return_tensors="pt").to(device)
23
 
24
- streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
25
 
26
  generate_kwargs = dict(
27
  model_inputs,
28
  streamer=streamer,
29
- max_new_tokens=512,
30
  top_p=0.2,
31
  top_k=20,
32
  temperature=0.1,
@@ -43,6 +47,6 @@ def predict(message, history):
43
 
44
  # Setting up the Gradio chat interface.
45
  gr.ChatInterface(predict,
46
- title="TextStructurization_Jack68m_CPU",
47
  description="Pass a text to be structurized"
48
  ).launch() # Launching the web interface.
 
7
  torch.set_num_threads(2)
8
 
9
  # Loading the tokenizer and model from Hugging Face's model hub.
10
+ tokenizer = AutoTokenizer.from_pretrained("cnmoro/teenytinyllama-160m-text-simplification-ptbr")
11
+ model = AutoModelForCausalLM.from_pretrained("cnmoro/teenytinyllama-160m-text-simplification-ptbr")
12
 
13
  # using CUDA for an optimal experience
14
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
15
  model = model.to(device)
16
 
17
+ def count_tokens(text):
18
+ return len(tokenizer.tokenize(text))
19
+
20
  # Function to generate model predictions.
21
  def predict(message, history):
22
+
23
+ formatted_prompt = f"<s><system>O objetivo é comprimir e estruturar o texto a seguir<texto>{message}</texto>"
24
  model_inputs = tokenizer([
25
+ formatted_prompt
26
  ], return_tensors="pt").to(device)
27
 
28
+ streamer = TextIteratorStreamer(tokenizer, timeout=120., skip_prompt=True, skip_special_tokens=True)
29
 
30
  generate_kwargs = dict(
31
  model_inputs,
32
  streamer=streamer,
33
+ max_new_tokens=3072 - count_tokens(formatted_prompt),
34
  top_p=0.2,
35
  top_k=20,
36
  temperature=0.1,
 
47
 
48
  # Setting up the Gradio chat interface.
49
  gr.ChatInterface(predict,
50
+ title="TextStructurization_TeenyTinyLlama160m_CPU",
51
  description="Pass a text to be structurized"
52
  ).launch() # Launching the web interface.