Spaces:

matteocirca
/

ASR-app-pro

Sleeping

App Files Files Community

matteocirca commited on Dec 9, 2023

Commit

8a0a2d0

1 Parent(s): e8aa27e

Update app

Browse files

Files changed (1) hide show

app.py +43 -31

app.py CHANGED Viewed

@@ -1,45 +1,57 @@
-from transformers import pipeline
 import gradio as gr
-from transformers import WhisperForConditionalGeneration
-pipe = pipeline(model="matteocirca/whisper-small-it")
-# pipe = pipeline(model="openai/whisper-small")
-# model = WhisperForConditionalGeneration.from_pretrained("matteocirca/whisper-small-it")
-segments = []
 def audio2segments(audio,word):
-    global segments
-    if not segments:
-        print(pipe(audio).keys())
-        # segments = pipe(audio)["segments"]
-        # segments = model.transcribe(audio)["segments"]
-    elif not word:
-        return "No word detected"
-    else:
         ranges = []
-        for s in segments:
-            if word in s['text'].replace(',',' , ').split(" "):
-                ranges.append((s['start'],s['end']))
-        res = ""
-        for i,r in enumerate(ranges):
-            res += f"{i}) {r[0]}-{r[1]}\n "
-        return res
-def find_segment():
-    global segments
-    res = ""
 iface = gr.Interface(
     fn=audio2segments,
-    inputs=[gr.Audio(sources=["microphone"], type="filepath"),"text"],
-    outputs="text",
     title="Whisper Small Italian",
     description="Realtime demo for Italian speech recognition using a fine-tuned Whisper small model.",
 )
 iface.launch()

+from transformers import pipeline, WhisperModel
 import gradio as gr
+import pandas as pd
+import string
+pipe = pipeline(model="matteocirca/whisper-small-it",return_timestamps="word")
+#model = WhisperModel.from_pretrained("matteocirca/whisper-small-it")
+#pipe = pipeline(model="openai/whisper-small",return_timestamps="word")
+current_audio = None
+segments = {}
 def audio2segments(audio,word):
+    global segments,current_audio
+    if audio != current_audio or current_audio == None:
+        segments = pipe(audio)
+        current_audio = audio
+    if not word:
+        if current_audio != None:
+            return segments["text"],"<html><h1>No Word inserted!</h1></html>"
+        else:
+            return "","<html><h1>No Word inserted!</h1></html>"
+    df = pd.DataFrame(columns=["Occurrence n","Starting TimeStamp","Ending TimeStamp"])
+    if word:
+        ranges_list = []
         ranges = []
+        print(segments)
+        for w in segments['chunks']:
+            if word == w["text"].translate(str.maketrans('', '', string.punctuation)).replace(" ","").lower() :
+                ranges_list.append(w["timestamp"])
+        res = "<table><thead><tr><th>Occurrence n°</th><th>Start</th><th>End</th></tr></thead><tbody>"
+        for i,r in enumerate(ranges_list):
+            #ranges_list.append({"Occurrence n":i,"Starting TimeStamp":r[0],"Ending TimeStamp":r[1]})
+            res += f"<tr><td>{i}</td><td>{r[0]}</td><td>{r[1]}</td></tr>"
+        res+=" </tbody></table>"
+        print(res)
+        return segments["text"],res
+def clear():
+    segments = {}
 iface = gr.Interface(
     fn=audio2segments,
+    inputs=[gr.Audio(sources=["upload","microphone"], type="filepath"),"text"],
+    outputs=["text","html"],
     title="Whisper Small Italian",
     description="Realtime demo for Italian speech recognition using a fine-tuned Whisper small model.",
 )
 iface.launch()