Spaces:

ms180
/

owsm_finetune

Running on A10G

App Files Files Community

ms180 commited on Jun 9, 2024

Commit

498c6f5

verified ·

1 Parent(s): e1c0f34

bug fix

Browse files

Files changed (1) hide show

app.py +35 -12

app.py CHANGED Viewed

@@ -9,23 +9,23 @@ from pathlib import Path
 import gradio as gr
-from finetune import finetune_model
 from language import languages
 from task import tasks
 import matplotlib.pyplot as plt
-os.environ['TEMP_DIR'] = tempfile.mkdtemp()
 def load_markdown():
     with open("intro.md", "r") as f:
         return f.read()
-def read_logs():
     try:
-        with open(f"output.log", "r") as f:
             return f.read()
     except:
         return None
@@ -34,7 +34,10 @@ def read_logs():
 def plot_loss_acc(temp_dir, log_every):
     sys.stdout.flush()
     lines = []
-    with open("output.log", "r") as f:
         for line in f.readlines():
             if re.match(r"^\[\d+\] - loss: \d+\.\d+ - acc: \d+\.\d+$", line):
                 lines.append(line)
@@ -68,22 +71,28 @@ def upload_file(fileobj, temp_dir):
     """
     # First check if a file is a zip file.
     if not zipfile.is_zipfile(fileobj.name):
         raise gr.Error("Please upload a zip file.")
     # Then unzip file
     shutil.unpack_archive(fileobj.name, temp_dir)
     # check zip file
     if not os.path.exists(os.path.join(temp_dir, "text")):
         raise gr.Error("Please upload a valid zip file.")
     if not os.path.exists(os.path.join(temp_dir, "text_ctc")):
         raise gr.Error("Please upload a valid zip file.")
     if not os.path.exists(os.path.join(temp_dir, "audio")):
         raise gr.Error("Please upload a valid zip file.")
     # check if all texts and audio matches
     audio_ids = []
     with open(os.path.join(temp_dir, "text"), "r") as f:
         for line in f.readlines():
@@ -100,25 +109,39 @@ def upload_file(fileobj, temp_dir):
             )
         if set(audio_ids) != set(ctc_audio_ids):
             raise gr.Error(f"`text` and `text_ctc` have different audio ids.")
     for audio_id in glob.glob(os.path.join(temp_dir, "audio", "*")):
         if not Path(audio_id).stem in audio_ids:
             raise gr.Error(f"Audio id {audio_id} is not in `text` or `text_ctc`.")
     gr.Info("Successfully uploaded and validated zip file.")
     return [fileobj]
 with gr.Blocks(title="OWSM-finetune") as demo:
-    tempdir_path = gr.State(os.environ['TEMP_DIR'])
     gr.Markdown(
         """# OWSM finetune demo!
 Finetune `owsm_v3.1_ebf_base` with your own dataset!
 Due to resource limitation, you can only train 10 epochs on maximum.
 ## Upload dataset and define settings
 """
     )
@@ -153,7 +176,7 @@ Due to resource limitation, you can only train 10 epochs on maximum.
     with gr.Row():
         with gr.Column():
             log_every = gr.Number(value=10, label="log_every", interactive=True)
-            max_epoch = gr.Slider(1, 10, step=1, label="max_epoch", interactive=True)
             scheduler = gr.Dropdown(
                 ["warmuplr"], label="warmup", value="warmuplr", interactive=True
             )
@@ -185,7 +208,7 @@ Due to resource limitation, you can only train 10 epochs on maximum.
                 max_lines=23,
                 lines=23,
             )
-            demo.load(read_logs, None, log_output, every=2)
         with gr.Column():
             log_acc = gr.Image(label="Accuracy", show_label=True, interactive=False)
@@ -241,7 +264,7 @@ Due to resource limitation, you can only train 10 epochs on maximum.
                 learning_rate,
                 weight_decay,
             ],
-            [trained_model, hyp_text]
         )
     gr.Markdown(load_markdown())

 import gradio as gr
+from finetune import finetune_model, log
 from language import languages
 from task import tasks
 import matplotlib.pyplot as plt
 def load_markdown():
     with open("intro.md", "r") as f:
         return f.read()
+def read_logs(temp_dir):
+    if not os.path.exists(f"{temp_dir}/output.log"):
+        return "Log file not found."
     try:
+        with open(f"{temp_dir}/output.log", "r") as f:
             return f.read()
     except:
         return None
 def plot_loss_acc(temp_dir, log_every):
     sys.stdout.flush()
     lines = []
+    if not os.path.exists(f"{temp_dir}/output.log"):
+        return None, None
+    with open(f"{temp_dir}/output.log", "r") as f:
         for line in f.readlines():
             if re.match(r"^\[\d+\] - loss: \d+\.\d+ - acc: \d+\.\d+$", line):
                 lines.append(line)
     """
     # First check if a file is a zip file.
     if not zipfile.is_zipfile(fileobj.name):
+        log(temp_dir, "Please upload a zip file.")
         raise gr.Error("Please upload a zip file.")
     # Then unzip file
+    log(temp_dir, "Unzipping file...")
     shutil.unpack_archive(fileobj.name, temp_dir)
     # check zip file
     if not os.path.exists(os.path.join(temp_dir, "text")):
+        log(temp_dir, "Please upload a valid zip file.")
         raise gr.Error("Please upload a valid zip file.")
     if not os.path.exists(os.path.join(temp_dir, "text_ctc")):
+        log(temp_dir, "Please upload a valid zip file.")
         raise gr.Error("Please upload a valid zip file.")
     if not os.path.exists(os.path.join(temp_dir, "audio")):
+        log(temp_dir, "Please upload a valid zip file.")
         raise gr.Error("Please upload a valid zip file.")
     # check if all texts and audio matches
+    log(temp_dir, "Checking if all texts and audio matches...")
     audio_ids = []
     with open(os.path.join(temp_dir, "text"), "r") as f:
         for line in f.readlines():
             )
         if set(audio_ids) != set(ctc_audio_ids):
+            log(temp_dir, f"`text` and `text_ctc` have different audio ids.")
             raise gr.Error(f"`text` and `text_ctc` have different audio ids.")
     for audio_id in glob.glob(os.path.join(temp_dir, "audio", "*")):
         if not Path(audio_id).stem in audio_ids:
             raise gr.Error(f"Audio id {audio_id} is not in `text` or `text_ctc`.")
+    log(temp_dir, "Successfully uploaded and validated zip file.")
     gr.Info("Successfully uploaded and validated zip file.")
     return [fileobj]
+def delete_tmp_dir(tmp_dir):
+    if os.path.exists(tmp_dir):
+        shutil.rmtree(tmp_dir)
+        print(f"Deleted temporary directory: {tmp_dir}")
+    else:
+        print("Temporary directory already deleted")
+def create_tmp_dir():
+    tmp_dir = tempfile.mkdtemp()
+    print(f"Created temporary directory: {tmp_dir}")
+    return tmp_dir
 with gr.Blocks(title="OWSM-finetune") as demo:
+    tempdir_path=gr.State(create_tmp_dir, delete_callback=delete_tmp_dir, time_to_live=600)
     gr.Markdown(
         """# OWSM finetune demo!
 Finetune `owsm_v3.1_ebf_base` with your own dataset!
 Due to resource limitation, you can only train 10 epochs on maximum.
 ## Upload dataset and define settings
 """
     )
     with gr.Row():
         with gr.Column():
             log_every = gr.Number(value=10, label="log_every", interactive=True)
+            max_epoch = gr.Slider(1, 30, step=1, label="max_epoch", interactive=True)
             scheduler = gr.Dropdown(
                 ["warmuplr"], label="warmup", value="warmuplr", interactive=True
             )
                 max_lines=23,
                 lines=23,
             )
+            demo.load(read_logs, [tempdir_path], log_output, every=2)
         with gr.Column():
             log_acc = gr.Image(label="Accuracy", show_label=True, interactive=False)
                 learning_rate,
                 weight_decay,
             ],
+            [trained_model, ref_text, base_text, hyp_text]
         )
     gr.Markdown(load_markdown())