| import os | |
| import tempfile | |
| import torch | |
| import gradio as gr | |
| from transformers import pipeline | |
| from huggingface_hub import notebook_login, InferenceClient | |
| TEXT_MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct" | |
| device = 0 if torch.cuda.is_available() else "cpu" | |
| AUDIO_MODEL_NAME = ( | |
| "distil-whisper/distil-large-v3" | |
| ) | |
| BATCH_SIZE = 8 | |
| pipe = pipeline( | |
| task="automatic-speech-recognition", | |
| model=AUDIO_MODEL_NAME, | |
| chunk_length_s=30, | |
| device=device, | |
| ) | |
| def transcribe(audio_input): | |
| """Function to convert audio to text.""" | |
| if audio_input is None: | |
| raise gr.Error("No audio file submitted.") | |
| output = pipe(audio_input, batch_size=BATCH_SIZE, | |
| generate_kwargs={"task": "transcribe"}, | |
| return_timestamps=True) | |
| return output["text"] | |
| client = InferenceClient() | |
| def build_messages(meeting_transcript) -> list: | |
| system_input = "You are an assitant that organizes meeting minutes." | |
| user_input = """Take this raw meeting transcript and return an organized version. | |
| Here is the transcript: | |
| {meeting_transcript} | |
| """.format( | |
| meeting_transcript=meeting_transcript | |
| ) | |
| messages = [ | |
| {"role": "system", "content": system_input}, | |
| {"role": "user", "content": user_input}, | |
| ] | |
| return messages | |
| def organize_text(meeting_transcript): | |
| messages = build_messages(meeting_transcript) | |
| response = client.chat_completion(messages, model=TEXT_MODEL_NAME, max_tokens=250, seed=430) | |
| return response.choices[0].message.content | |
| def meeting_transcript_tool(audio_input): | |
| meeting_text = transcribe(audio_input) | |
| organized_text = organize_text(meeting_text) | |
| return organized_text | |
| demo = gr.Interface( | |
| fn=meeting_transcript_tool, | |
| inputs=gr.Audio(type="filepath"), | |
| outputs=gr.Textbox(show_copy_button=True), | |
| title="The Complete Meeting Transcription tool", | |
| ) | |
| demo.launch() |