seier-brightside commited on
Commit
1459254
·
verified ·
1 Parent(s): 48f6c2b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -1,11 +1,11 @@
1
- import gradio as gr
2
  import json
3
- from span_marker import SpanMarkerModel, SpanMarkerTrainer, SpanMarkerTrainingArguments
 
4
  from datasets import Dataset
5
  from sklearn.model_selection import train_test_split
6
 
7
  def entrenar(jsonl_file):
8
- # Cargar JSONL
9
  raw = [json.loads(l) for l in jsonl_file.splitlines()]
10
 
11
  dataset = []
@@ -25,9 +25,9 @@ def entrenar(jsonl_file):
25
 
26
  # Extraer etiquetas
27
  labels = sorted(list({e["label"] for d in dataset for e in d["entities"]}))
28
- labels.insert(0, "O") # obligatorio
29
 
30
- # Train/test
31
  train, test = train_test_split(dataset, test_size=0.2, random_state=42)
32
  train_ds = Dataset.from_list(train)
33
  test_ds = Dataset.from_list(test)
@@ -38,19 +38,18 @@ def entrenar(jsonl_file):
38
  labels=labels
39
  )
40
 
41
- # Argumentos
42
  args = SpanMarkerTrainingArguments(
43
  output_dir="modelo_final",
 
44
  learning_rate=5e-5,
45
  per_device_train_batch_size=2,
46
  per_device_eval_batch_size=2,
47
- num_train_epochs=3,
48
- logging_steps=10,
49
  save_strategy="epoch",
50
- evaluation_strategy="epoch"
 
51
  )
52
 
53
- # Entrenador
54
  trainer = SpanMarkerTrainer(
55
  model=model,
56
  args=args,
@@ -60,13 +59,14 @@ def entrenar(jsonl_file):
60
 
61
  trainer.train()
62
 
63
- return "Entrenamiento completado. El modelo está en /modelo_final"
64
 
65
  ui = gr.Interface(
66
  fn=entrenar,
67
- inputs=gr.File(label="Sube tu archivo JSONL exportado de Label Studio"),
68
  outputs="text",
69
  title="Entrenamiento NER Médico con SpanMarker"
70
  )
71
 
72
  ui.launch()
 
 
1
+ import gradio as gr
2
  import json
3
+ from span_marker import SpanMarkerModel, SpanMarkerTrainer
4
+ from span_marker import SpanMarkerTrainingArguments
5
  from datasets import Dataset
6
  from sklearn.model_selection import train_test_split
7
 
8
  def entrenar(jsonl_file):
 
9
  raw = [json.loads(l) for l in jsonl_file.splitlines()]
10
 
11
  dataset = []
 
25
 
26
  # Extraer etiquetas
27
  labels = sorted(list({e["label"] for d in dataset for e in d["entities"]}))
28
+ labels.insert(0, "O")
29
 
30
+ # Datasets Hugging Face
31
  train, test = train_test_split(dataset, test_size=0.2, random_state=42)
32
  train_ds = Dataset.from_list(train)
33
  test_ds = Dataset.from_list(test)
 
38
  labels=labels
39
  )
40
 
41
+ # Args
42
  args = SpanMarkerTrainingArguments(
43
  output_dir="modelo_final",
44
+ num_train_epochs=3,
45
  learning_rate=5e-5,
46
  per_device_train_batch_size=2,
47
  per_device_eval_batch_size=2,
 
 
48
  save_strategy="epoch",
49
+ evaluation_strategy="epoch",
50
+ logging_steps=10
51
  )
52
 
 
53
  trainer = SpanMarkerTrainer(
54
  model=model,
55
  args=args,
 
59
 
60
  trainer.train()
61
 
62
+ return "¡Entrenamiento completado! Modelo guardado en /modelo_final"
63
 
64
  ui = gr.Interface(
65
  fn=entrenar,
66
+ inputs=gr.File(label="Sube tu JSONL exportado de Label Studio"),
67
  outputs="text",
68
  title="Entrenamiento NER Médico con SpanMarker"
69
  )
70
 
71
  ui.launch()
72
+