stt_api

Sleeping

App Files Files Community

ldhldh commited on Feb 14, 2024

Commit

46e3fab

verified ·

1 Parent(s): 698f4e5

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -118

app.py CHANGED Viewed

@@ -1,121 +1,88 @@
 import gradio as gr
-from gradio import routes
-from typing import List, Type
-import requests, os, re, asyncio, queue
-import math
 import time
-import datetime
-import requests, json
-loop = asyncio.get_event_loop()
-# Monkey patch
-def get_types(cls_set: List[Type], component: str):
-    docset = []
-    types = []
-    if component == "input":
-        for cls in cls_set:
-            doc = inspect.getdoc(cls)
-            doc_lines = doc.split("\n")
-            docset.append(doc_lines[1].split(":")[-1])
-            types.append(doc_lines[1].split(")")[0].split("(")[-1])
-    else:
-        for cls in cls_set:
-            doc = inspect.getdoc(cls)
-            doc_lines = doc.split("\n")
-            docset.append(doc_lines[-1].split(":")[-1])
-            types.append(doc_lines[-1].split(")")[0].split("(")[-1])
-    return docset, types
-routes.get_types = get_types
-user_data = dict()
-live_user = dict()
-chat_history = []
-def register(id, pw):
-    if not id in user_data:
-        user_data[id] = pw
-        return "ok"
-    else:
-        return "fail"
-def login(id, pw):
-    if not id in user_data:
-        return "fail"
-    else:
-        if user_data[id] != pw:
-            return "fail"
-        else:
-            live_user[id] = 20
-            return "ok"
-def chat(name, text, time):
-    if not name in user_data:
-        return "no id"
-    else:
-        chat_history.append({"name": name, "text":text, "time":time})
-    return "ok"
-def get_data(name):
-    global live_user
-    for u in live_user.keys():
-        if u == name:
-            live_user[u] = 20
-        else:
-            live_user[u] -= 1
-            if live_user[u] < 0:
-                del live_user[u]
-    return chat_history
-def get_live_user():
-    return live_user.keys()
-def clear_data():
-    global chat_history
-    chat_history = []
-    return "ok"
-with gr.Blocks() as demo:
-    count = 0
-    gr.Markdown(
-    f"{chat_history}"
-    )
-    aa = gr.Interface(
-      fn=chat,
-      inputs=["text", "text", "text"],
-      outputs="text",
-      description="chat",
-    )
-    bb = gr.Interface(
-      fn=login,
-      inputs=["text", "text"],
-      outputs="text",
-      description="login",
-    )
-    cc = gr.Interface(
-      fn=register,
-      inputs=["text", "text"],
-      outputs="text",
-      description="register",
-    )
-    dd = gr.Interface(
-      fn=get_data,
-      inputs=["text"],
-      outputs="text",
-      description="get_data",
-    )
-    gg = gr.Interface(
-      fn=get_live_user,
-      inputs=[],
-      outputs="text",
-      description="get_live_user",
-    )
-    ss = gr.Interface(
-      fn=clear_data,
-      inputs=[],
-      outputs="text",
-      description="clear_data",
     )
-    demo.queue(max_size=32).launch()

+import os
 import gradio as gr
+import whisper
+from whisper import tokenizer
 import time
+current_size = 'base'
+model = whisper.load_model(current_size)
+AUTO_DETECT_LANG = "Auto Detect"
+def transcribe(audio, state={}, model_size='base', delay=1.2, lang=None, translate=False):
+    time.sleep(delay - 1)
+    global current_size
+    global model
+    if model_size != current_size:
+        current_size = model_size
+        model = whisper.load_model(current_size)
+    transcription = model.transcribe(
+        audio,
+        language = lang if lang != AUTO_DETECT_LANG else None
     )
+    state['transcription'] += transcription['text'] + " "
+    if translate:
+        x = whisper.load_audio(audio)
+        x = whisper.pad_or_trim(x)
+        mel = whisper.log_mel_spectrogram(x).to(model.device)
+        options = whisper.DecodingOptions(task = "translation")
+        translation = whisper.decode(model, mel, options)
+        state['translation'] += translation.text + " "
+    return state['transcription'], state['translation'], state, f"detected language: {transcription['language']}"
+title = "OpenAI's Whisper Real-time Demo"
+description = "A simple demo of OpenAI's [**Whisper**](https://github.com/openai/whisper) speech recognition model. This demo runs on a CPU. For faster inference choose 'tiny' model size and set the language explicitly."
+model_size = gr.Dropdown(label="Model size", choices=['base', 'tiny', 'small', 'medium', 'large'], value='base')
+delay_slider = gr.inputs.Slider(minimum=1, maximum=5, default=1.2, label="Rate of transcription")
+available_languages = sorted(tokenizer.TO_LANGUAGE_CODE.keys())
+available_languages = [lang.capitalize() for lang in available_languages]
+available_languages = [AUTO_DETECT_LANG]+available_languages
+lang_dropdown = gr.inputs.Dropdown(choices=available_languages, label="Language", default=AUTO_DETECT_LANG, type="value")
+if lang_dropdown==AUTO_DETECT_LANG:
+    lang_dropdown=None
+translate_checkbox = gr.inputs.Checkbox(label="Translate to English", default=False)
+transcription_tb = gr.Textbox(label="Transcription", lines=10, max_lines=20)
+translation_tb = gr.Textbox(label="Translation", lines=10, max_lines=20)
+detected_lang = gr.outputs.HTML(label="Detected Language")
+state = gr.State({"transcription": "", "translation": ""})
+gr.Interface(
+    fn=transcribe,
+    inputs=[
+        gr.Audio(source="microphone", type="filepath", streaming=True),
+        state,
+        model_size,
+        delay_slider,
+        lang_dropdown,
+        translate_checkbox
+        ],
+    outputs=[
+        transcription_tb,
+        translation_tb,
+        state,
+        detected_lang
+    ],
+    live=True,
+    allow_flagging='never',
+    title=title,
+    description=description,
+).launch(
+    # enable_queue=True,
+    # debug=True
+  )