flamiry commited on
Commit
02f6dbc
·
verified ·
1 Parent(s): 140e2c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -5,6 +5,7 @@ from datasets import load_dataset
5
  import spaces
6
  from huggingface_hub import login
7
  import os
 
8
 
9
  login(token=os.environ.get("hf_token"))
10
 
@@ -13,17 +14,18 @@ tokenizer = AutoTokenizer.from_pretrained("flamiry/first")
13
  tokenizer.pad_token = tokenizer.eos_token
14
 
15
  @spaces.GPU
16
- def train_model(number):
17
- number = int(number)
 
18
  try:
19
  dataset = load_dataset("allenai/c4", "sk", split="train", streaming=True)
20
- slovak_texts = [example['text'] for example in dataset.take(number)]
21
  inputs = tokenizer(
22
  slovak_texts,
23
  return_tensors="pt",
24
  padding=True,
25
  truncation=True,
26
- max_length=64
27
  )
28
 
29
  optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)
@@ -55,10 +57,11 @@ with gr.Blocks() as demo:
55
 
56
  with gr.Tab("Train Model"):
57
  gr.Markdown("Click to train the model on Slovak data")
58
- number_input = gr.Textbox(label="Kol-vo textov")
 
59
  train_btn = gr.Button("Start Training", variant="primary")
60
  train_output = gr.Textbox(label="Result", interactive=False)
61
- train_btn.click(train_model, inputs=[number_input] ,outputs=train_output)
62
 
63
  with gr.Tab("Generate Text"):
64
  gr.Markdown("Generate Slovak text")
 
5
  import spaces
6
  from huggingface_hub import login
7
  import os
8
+ from itertools import islice
9
 
10
  login(token=os.environ.get("hf_token"))
11
 
 
14
  tokenizer.pad_token = tokenizer.eos_token
15
 
16
  @spaces.GPU
17
+ def train_model(start, end):
18
+ start = int(start)
19
+ end = int(end)
20
  try:
21
  dataset = load_dataset("allenai/c4", "sk", split="train", streaming=True)
22
+ slovak_texts = [example['text'] for example in islice(dataset, start, end)]
23
  inputs = tokenizer(
24
  slovak_texts,
25
  return_tensors="pt",
26
  padding=True,
27
  truncation=True,
28
+ max_length=512
29
  )
30
 
31
  optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)
 
57
 
58
  with gr.Tab("Train Model"):
59
  gr.Markdown("Click to train the model on Slovak data")
60
+ start_input = gr.Textbox(label="Start")
61
+ end_input = gr.Textbox(label="End")
62
  train_btn = gr.Button("Start Training", variant="primary")
63
  train_output = gr.Textbox(label="Result", interactive=False)
64
+ train_btn.click(train_model, inputs=[start_input, end_input] ,outputs=train_output)
65
 
66
  with gr.Tab("Generate Text"):
67
  gr.Markdown("Generate Slovak text")