| | import os |
| | import torch |
| | import gradio as gr |
| | from transformers import pipeline |
| | from huggingface_hub import InferenceClient |
| |
|
| | |
| | |
| | |
| | device = 0 if torch.cuda.is_available() else "cpu" |
| | AUDIO_MODEL_NAME = "distil-whisper/distil-large-v3" |
| | BATCH_SIZE = 8 |
| |
|
| | pipe = pipeline( |
| | task="automatic-speech-recognition", |
| | model=AUDIO_MODEL_NAME, |
| | chunk_length_s=30, |
| | device=device, |
| | ) |
| |
|
| | def transcribe(audio_input): |
| | """Convert audio to text using Whisper.""" |
| | if audio_input is None: |
| | raise gr.Error("No audio file submitted!") |
| |
|
| | output = pipe( |
| | audio_input, |
| | batch_size=BATCH_SIZE, |
| | generate_kwargs={"task": "transcribe"}, |
| | return_timestamps=True |
| | ) |
| | return output["text"] |
| |
|
| | |
| | |
| | |
| | TEXT_MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct" |
| |
|
| | |
| | hf_token = os.getenv("HF_TOKEN") |
| | if not hf_token: |
| | raise ValueError("HF_TOKEN not found! Add it as a secret in your Space settings.") |
| |
|
| | |
| | client = InferenceClient(token=hf_token, base_url="https://api-inference.huggingface.co") |
| |
|
| | def build_messages(meeting_transcript) -> list: |
| | system_input = "You are an assistant that organizes meeting minutes." |
| | user_input = f""" |
| | Take this raw meeting transcript and return an organized, sectioned version. |
| | You may include a summary at the top. |
| | |
| | Transcript: |
| | {meeting_transcript} |
| | """ |
| | return [ |
| | {"role": "system", "content": system_input}, |
| | {"role": "user", "content": user_input}, |
| | ] |
| |
|
| | def organize_text(meeting_transcript): |
| | messages = build_messages(meeting_transcript) |
| | response = client.chat_completion( |
| | messages, model=TEXT_MODEL_NAME, max_tokens=300, seed=42 |
| | ) |
| | return response.choices[0].message.content |
| |
|
| | |
| | |
| | |
| | def meeting_transcript_tool(audio_input): |
| | meeting_text = transcribe(audio_input) |
| | organized_text = organize_text(meeting_text) |
| | return organized_text |
| |
|
| | |
| | |
| | |
| | demo = gr.Interface( |
| | fn=meeting_transcript_tool, |
| | inputs=gr.Audio(type="filepath"), |
| | outputs=gr.Textbox(show_copy_button=True, label="Organized Transcript"), |
| | title="🪶 Meeting Transcription Tool", |
| | description="Upload or record an audio file. This app transcribes it using Whisper and organizes the text using Phi-3", |
| | ) |
| |
|
| | demo.launch() |
| |
|