| import dash |
| from dash import dcc, html, Input, Output, State, callback_context |
| import dash_bootstrap_components as dbc |
| import os |
| import tempfile |
| import base64 |
| import openai |
| import docx |
| from datetime import datetime |
| import threading |
| import time |
| import google.generativeai as genai |
| from anthropic import Anthropic |
| import requests |
| import uuid |
| import flask |
| import shutil |
| import logging |
| from collections import defaultdict |
| from moviepy import * |
|
|
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
|
| openai.api_key = os.getenv("OPENAI_API_KEY") |
| if not openai.api_key: |
| logging.warning("OPENAI_API_KEY not set. Transcription will fail.") |
|
|
| google_api_key = os.getenv("GOOGLE_API_KEY") |
| if google_api_key: |
| try: |
| genai.configure(api_key=google_api_key) |
| except Exception as e: |
| logging.error(f"Failed to configure Google Gemini: {e}") |
| genai = None |
| else: |
| genai = None |
| logging.warning("GOOGLE_API_KEY not set. Gemini model will not be available.") |
|
|
| anthropic_api_key = os.getenv("ANTHROPIC_API_KEY") |
| if anthropic_api_key: |
| try: |
| anthropic = Anthropic(api_key=anthropic_api_key) |
| except Exception as e: |
| logging.error(f"Failed to initialize Anthropic client: {e}") |
| anthropic = None |
| else: |
| anthropic = None |
| logging.warning("ANTHROPIC_API_KEY not set. Claude model will not be available.") |
|
|
| grok_api_key = os.getenv("GROK_API_KEY") |
| if not grok_api_key: |
| logging.warning("GROK_API_KEY not set. Groq model will not be available.") |
|
|
| server = flask.Flask(__name__) |
| app = dash.Dash(__name__, server=server, external_stylesheets=[dbc.themes.BOOTSTRAP], suppress_callback_exceptions=True) |
|
|
| session_data = defaultdict(lambda: {"audio_path": None, "transcript": None, "minutes": None, "diarized": None, "temp_dir": None, "original_filename": None}) |
| session_locks = defaultdict(threading.Lock) |
|
|
| def get_session_dir(session_id): |
| if session_data[session_id]["temp_dir"] is None or not os.path.exists(session_data[session_id]["temp_dir"]): |
| session_specific_dir = tempfile.mkdtemp(prefix=f"session_{session_id}_") |
| session_data[session_id]["temp_dir"] = session_specific_dir |
| logging.info(f"Created temp directory for session {session_id}: {session_specific_dir}") |
| return session_data[session_id]["temp_dir"] |
|
|
| def cleanup_session(session_id): |
| with session_locks[session_id]: |
| logging.info(f"Cleaning up session: {session_id}") |
| session_dir = session_data[session_id].get("temp_dir") |
| if session_dir and os.path.exists(session_dir): |
| try: |
| shutil.rmtree(session_dir) |
| logging.info(f"Removed temp directory: {session_dir}") |
| except Exception as e: |
| logging.error(f"Error removing directory {session_dir}: {e}") |
| if session_id in session_data: |
| del session_data[session_id] |
| if session_id in session_locks: |
| del session_locks[session_id] |
| logging.info(f"Session data cleared for {session_id}") |
|
|
| def save_base64_data(content_string, file_path): |
| try: |
| logging.info(f"Decoding base64 data for {file_path}") |
| content_type, content_string = content_string.split(',') |
| data_bytes = base64.b64decode(content_string) |
| with open(file_path, 'wb') as f: |
| f.write(data_bytes) |
| logging.info(f"Saved uploaded data to {file_path}") |
| return file_path |
| except ValueError as e: |
| logging.error(f"Error splitting content string: {e}. String might not be in 'type,base64_data' format.") |
| return None |
| except base64.binascii.Error as e: |
| logging.error(f"Error decoding base64: {e}") |
| return None |
| except Exception as e: |
| logging.error(f"Error saving base64 data: {e}") |
| return None |
|
|
| def extract_audio_from_video(video_path, audio_output_path): |
| try: |
| logging.info(f"Extracting audio from {video_path} to {audio_output_path}") |
| video = VideoFileClip(video_path) |
| video.audio.write_audiofile(audio_output_path, codec='mp3') |
| video.close() |
| logging.info(f"Successfully extracted audio to {audio_output_path}") |
| return audio_output_path |
| except Exception as e: |
| logging.error(f"Error extracting audio from {video_path}: {e}") |
| if os.path.exists(audio_output_path): |
| os.remove(audio_output_path) |
| if 'video' in locals() and hasattr(video, 'close'): |
| video.close() |
| return None |
|
|
| def transcribe_audio(file_path): |
| logging.info(f"Starting transcription for {file_path}") |
| if not openai.api_key: |
| return "Error: OpenAI API key not configured." |
| if not os.path.exists(file_path): |
| logging.error(f"Transcription failed: File not found at {file_path}") |
| return "Error: Audio file not found for transcription." |
| try: |
| with open(file_path, "rb") as audio_file: |
| client = openai.OpenAI() |
| transcript = client.audio.transcriptions.create( |
| model="whisper-1", |
| file=audio_file, |
| response_format="text" |
| ) |
| logging.info(f"Transcription successful for {file_path}") |
| if isinstance(transcript, str): |
| return transcript |
| elif hasattr(transcript, 'text'): |
| return transcript.text |
| else: |
| logging.error(f"Unexpected transcription response format: {type(transcript)}") |
| return "Error: Could not extract transcript text from OpenAI response." |
| except openai.BadRequestError as e: |
| logging.error(f"OpenAI API Bad Request Error (possibly file format/size issue): {e}") |
| error_message = f"Error during transcription: {e}" |
| if "Invalid file format" in str(e): |
| error_message = "Error: Invalid audio file format. Supported formats include mp3, mp4, mpeg, mpga, m4a, wav, and webm." |
| elif "maximum file size" in str(e): |
| error_message = "Error: Audio file exceeds the maximum size limit (25MB) for direct upload." |
| return error_message |
| except openai.AuthenticationError: |
| logging.error("OpenAI API Authentication Error: Check your API key.") |
| return "Error: OpenAI API Authentication Failed. Check API Key." |
| except Exception as e: |
| logging.error(f"An unexpected error occurred during transcription: {e}") |
| return f"Error during transcription: An unexpected error occurred." |
|
|
| def generate_minutes_ai(transcript, model_name, session_id): |
| logging.info(f"Generating minutes using {model_name} for session {session_id}") |
| if not transcript or "Error:" in transcript: |
| return "Error: Cannot generate minutes from invalid or missing transcript." |
| with session_locks[session_id]: |
| try: |
| if model_name == 'openai': |
| if not openai.api_key: return "Error: OpenAI API key not configured." |
| client = openai.OpenAI() |
| response = client.chat.completions.create( |
| model="gpt-3.5-turbo", |
| messages=[ |
| {"role": "system", "content": "You are a professional assistant tasked with creating structured meeting minutes, including sections like Attendees, Agenda, Discussion Points, Action Items, and Decisions Made."}, |
| {"role": "user", "content": f"Generate detailed meeting minutes from this transcript:\n\n{transcript}"} |
| ], |
| timeout=120 |
| ) |
| logging.info(f"OpenAI minutes generation successful for session {session_id}") |
| return response.choices[0].message.content |
| elif model_name == 'gemini': |
| if not genai: return "Error: Google Gemini API not configured or key missing." |
| model = genai.GenerativeModel('gemini-1.5-flash-latest') |
| response = model.generate_content( |
| f"Generate detailed meeting minutes from this transcript, including sections like Attendees, Agenda, Discussion Points, Action Items, and Decisions Made:\n\n{transcript}", |
| request_options={'timeout': 120} |
| ) |
| logging.info(f"Gemini minutes generation successful for session {session_id}") |
| if response.parts: |
| return response.text |
| else: |
| logging.warning(f"Gemini response blocked or empty for session {session_id}. Reason: {response.prompt_feedback}") |
| return f"Error: Gemini response blocked or empty. Reason: {response.prompt_feedback}" |
| elif model_name == 'anthropic': |
| if not anthropic: return "Error: Anthropic API not configured or key missing." |
| response = anthropic.messages.create( |
| model="claude-3-5-haiku-20241022", |
| max_tokens=2000, |
| messages=[ |
| { |
| "role": "user", |
| "content": f"Generate detailed meeting minutes from this transcript, including sections like Attendees, Agenda, Discussion Points, Action Items, and Decisions Made:\n\n{transcript}" |
| } |
| ], |
| timeout=120 |
| ) |
| logging.info(f"Anthropic minutes generation successful for session {session_id}") |
| if response.content and isinstance(response.content, list) and hasattr(response.content[0], 'text'): |
| return response.content[0].text |
| else: |
| logging.error(f"Could not extract content from Anthropic response: {response}") |
| return "Error: Could not extract content from Anthropic response." |
| elif model_name == 'grok': |
| if not grok_api_key: return "Error: Grok API key (via Groq) not configured." |
| groq_url = "https://api.groq.com/openai/v1/chat/completions" |
| headers = { |
| "Authorization": f"Bearer {grok_api_key}", |
| "Content-Type": "application/json" |
| } |
| data = { |
| "model": "grok-3-mini-fast-beta", |
| "messages": [ |
| {"role": "system", "content": "You are a professional assistant tasked with creating structured meeting minutes, including sections like Attendees, Agenda, Discussion Points, Action Items, and Decisions Made."}, |
| {"role": "user", "content": f"Generate detailed meeting minutes from this transcript:\n\n{transcript}"} |
| ], |
| "max_tokens": 2000, |
| "temperature": 0.7 |
| } |
| response = requests.post(groq_url, headers=headers, json=data, timeout=120) |
| response.raise_for_status() |
| logging.info(f"Groq ({data['model']}) minutes generation successful for session {session_id}") |
| return response.json()["choices"][0]["message"]["content"] |
| else: |
| logging.warning(f"Invalid model selection: {model_name}") |
| return "Error: Invalid model selection" |
| except requests.exceptions.Timeout: |
| logging.error(f"API Request Timeout for {model_name} on session {session_id}") |
| return f"Error: Request to {model_name} API timed out." |
| except requests.exceptions.RequestException as e: |
| logging.error(f"API Request Error for {model_name}: {e}") |
| if model_name == 'grok' and e.response is not None: |
| if e.response.status_code == 429: |
| logging.warning(f"Groq Rate Limit hit for session {session_id}") |
| return "Error: Groq API rate limit exceeded. Please try again later." |
| elif e.response.status_code == 404: |
| logging.error(f"Model {data['model']} not found via Groq API. Status: {e.response.status_code}. Response: {e.response.text}") |
| return f"Error: Model '{data['model']}' not found or accessible via Groq API. Please check model availability." |
| elif e.response.status_code >= 400: |
| logging.error(f"Groq API error. Status: {e.response.status_code}. Response: {e.response.text}") |
| return f"Error communicating with Groq API: {e.response.status_code}" |
| return f"Error communicating with {model_name} API: {e}" |
| except (genai.types.generation_types.BlockedPromptException, genai.types.generation_types.StopCandidateException) as e: |
| logging.error(f"Gemini content generation issue for session {session_id}: {e}") |
| return f"Error: Gemini generation failed or was blocked. {e}" |
| except Exception as e: |
| logging.error(f"Error generating minutes with {model_name} for session {session_id}: {e}", exc_info=True) |
| if model_name == 'anthropic' and 'Could not find model' in str(e): |
| return f"Error: Anthropic model '{response.model if 'response' in locals() else 'claude-3-5-haiku-20241022'}' not found or accessible. Check model name and API key permissions." |
| elif model_name == 'gemini' and 'model not found' in str(e).lower(): |
| return f"Error: Gemini model '{model.model_name if 'model' in locals() else 'gemini-1.5-flash-latest'}' not found or accessible. Check model name and API key permissions." |
| return f"Error generating minutes using {model_name}: An unexpected error occurred." |
|
|
| def diarize_transcript_ai(transcript, model_name, session_id): |
| logging.info(f"Generating diarized transcript using {model_name} for session {session_id}") |
| if not transcript or "Error:" in transcript: |
| return "Error: Cannot diarize invalid or missing transcript." |
| diarization_prompt = ( |
| "Analyze the given transcript to identify distinct speakers without labeled identifiers. " |
| "Create unique speaker embeddings based on individual speech patterns, vocabulary choices, and linguistic styles. " |
| "Examine the context and content of each utterance to detect likely speaker changes. " |
| "Recognize typical conversation structures and turn-taking behaviors to differentiate between speakers. " |
| "Finally, use topic modeling to identify shifts in subject matter and areas of expertise, associating certain topics with specific speakers. " |
| "Based on this analysis, assign speaker labels (e.g., Speaker 1, Speaker 2) to each utterance in the transcript." |
| "\n\nTranscript:\n" + transcript |
| ) |
| with session_locks[session_id]: |
| try: |
| if model_name == 'openai': |
| if not openai.api_key: return "Error: OpenAI API key not configured." |
| client = openai.OpenAI() |
| response = client.chat.completions.create( |
| model="gpt-3.5-turbo", |
| messages=[ |
| {"role": "system", "content": "You are a professional assistant skilled in speaker diarization and transcript formatting."}, |
| {"role": "user", "content": diarization_prompt} |
| ], |
| timeout=120 |
| ) |
| logging.info(f"OpenAI diarization successful for session {session_id}") |
| return response.choices[0].message.content |
| elif model_name == 'gemini': |
| if not genai: return "Error: Google Gemini API not configured or key missing." |
| model = genai.GenerativeModel('gemini-1.5-flash-latest') |
| response = model.generate_content( |
| diarization_prompt, |
| request_options={'timeout': 120} |
| ) |
| logging.info(f"Gemini diarization successful for session {session_id}") |
| if response.parts: |
| return response.text |
| else: |
| logging.warning(f"Gemini diarization response blocked or empty for session {session_id}. Reason: {response.prompt_feedback}") |
| return f"Error: Gemini response blocked or empty. Reason: {response.prompt_feedback}" |
| elif model_name == 'anthropic': |
| if not anthropic: return "Error: Anthropic API not configured or key missing." |
| response = anthropic.messages.create( |
| model="claude-3-5-haiku-20241022", |
| max_tokens=2000, |
| messages=[ |
| { |
| "role": "user", |
| "content": diarization_prompt |
| } |
| ], |
| timeout=120 |
| ) |
| logging.info(f"Anthropic diarization successful for session {session_id}") |
| if response.content and isinstance(response.content, list) and hasattr(response.content[0], 'text'): |
| return response.content[0].text |
| else: |
| logging.error(f"Could not extract content from Anthropic diarization response: {response}") |
| return "Error: Could not extract content from Anthropic response." |
| elif model_name == 'grok': |
| if not grok_api_key: return "Error: Grok API key (via Groq) not configured." |
| groq_url = "https://api.groq.com/openai/v1/chat/completions" |
| headers = { |
| "Authorization": f"Bearer {grok_api_key}", |
| "Content-Type": "application/json" |
| } |
| data = { |
| "model": "grok-3-mini-fast-beta", |
| "messages": [ |
| {"role": "system", "content": "You are a professional assistant skilled in speaker diarization and transcript formatting."}, |
| {"role": "user", "content": diarization_prompt} |
| ], |
| "max_tokens": 2000, |
| "temperature": 0.7 |
| } |
| response = requests.post(groq_url, headers=headers, json=data, timeout=120) |
| response.raise_for_status() |
| logging.info(f"Groq ({data['model']}) diarization successful for session {session_id}") |
| return response.json()["choices"][0]["message"]["content"] |
| else: |
| logging.warning(f"Invalid model selection for diarization: {model_name}") |
| return "Error: Invalid model selection" |
| except Exception as e: |
| logging.error(f"Error during diarization with {model_name} for session {session_id}: {e}", exc_info=True) |
| return f"Error generating diarized transcript using {model_name}: An unexpected error occurred." |
|
|
| def save_to_word(content, filename): |
| try: |
| doc = docx.Document() |
| doc.add_paragraph(content) |
| doc.save(filename) |
| logging.info(f"Saved content to Word document: {filename}") |
| return filename |
| except Exception as e: |
| logging.error(f"Error saving to Word document {filename}: {e}") |
| return None |
|
|
| ALLOWED_AUDIO_EXTENSIONS = ['.mp3', '.wav', '.m4a', '.webm', '.mp4', '.mpeg', '.mpga'] |
| ALLOWED_VIDEO_EXTENSIONS = ['.mp4', '.mov', '.avi', '.webm', '.mkv', '.flv'] |
| ALLOWED_UPLOAD_EXTENSIONS = ALLOWED_AUDIO_EXTENSIONS + ALLOWED_VIDEO_EXTENSIONS |
|
|
| app.layout = dbc.Container([ |
| dcc.Store(id='session-id', storage_type='local'), |
| dcc.Store(id='session-state-trigger'), |
| dcc.Download(id="download-transcript"), |
| dcc.Download(id="download-audio"), |
| dcc.Download(id="download-minutes"), |
| dcc.Download(id="download-diarized"), |
| dbc.Row([ |
| dbc.Col(dbc.Card( |
| dbc.CardBody([ |
| html.H4("Controls", className="card-title"), |
| html.Div("Upload meeting audio or video file:"), |
| dcc.Upload( |
| id='audio-uploader', |
| children=html.Div([ |
| 'Drag and Drop or ', |
| html.A('Select Audio/Video File') |
| ]), |
| style={ |
| 'width': '100%', |
| 'height': '60px', |
| 'lineHeight': '60px', |
| 'borderWidth': '1px', |
| 'borderStyle': 'dashed', |
| 'borderRadius': '5px', |
| 'textAlign': 'center', |
| 'margin': '10px 0' |
| }, |
| multiple=False, |
| accept='audio/*,video/*' |
| ), |
| html.Div(id='upload-status', children='Status: Ready to Upload', className="mt-2"), |
| html.H5("Select AI Model", className="mt-4"), |
| dcc.Dropdown( |
| id='model-selection', |
| options=[ |
| {'label': 'OpenAI GPT-3.5 Turbo', 'value': 'openai', 'disabled': not openai.api_key}, |
| {'label': 'Google Gemini 1.5 Flash', 'value': 'gemini', 'disabled': not genai}, |
| {'label': 'Anthropic Claude 3.5 Haiku', 'value': 'anthropic', 'disabled': not anthropic}, |
| {'label': 'Grok 3 Mini', 'value': 'grok', 'disabled': not grok_api_key} |
| ], |
| value='openai' if openai.api_key else ('gemini' if genai else ('anthropic' if anthropic else ('grok' if grok_api_key else None))), |
| clearable=False, |
| className="mt-2", |
| disabled=not (openai.api_key or genai or anthropic or grok_api_key) |
| ), |
| dbc.Button("Generate Minutes", id="minutes-btn", color="secondary", className="mt-4 w-100", disabled=True), |
| dbc.Button("Diarize Transcript", id="diarize-btn", color="secondary", className="mt-2 w-100", disabled=True), |
| html.H5("Downloads", className="mt-4"), |
| dbc.Button("Download Transcript (.docx)", id="download-transcript-btn", color="info", className="w-100 mb-2", disabled=True), |
| dbc.Button("Download Minutes (.docx)", id="download-minutes-btn", color="info", className="w-100 mb-2", disabled=True), |
| dbc.Button("Download Processed Audio", id="download-audio-btn", color="info", className="w-100 mb-2", disabled=True), |
| dbc.Button("Download Diarized Transcript (.docx)", id="download-diarized-btn", color="info", className="w-100 mb-2", disabled=True), |
| dbc.Button("Delete Session Data", id="delete-btn", color="warning", className="mt-4 w-100", disabled=True), |
| ]), |
| style={'height': '80vh', 'overflow-y': 'auto'} |
| ), width=12, lg=4), |
| dbc.Col(dbc.Card( |
| dbc.CardBody([ |
| dcc.Loading( |
| id="loading", |
| type="default", |
| parent_style={'position': 'relative', 'height': '100%'}, |
| style={'position': 'absolute', 'top': '50%', 'left': '50%', 'transform': 'translate(-50%, -50%)', 'zIndex':'1000'}, |
| children=[ |
| html.Div([ |
| html.H4("Output", className="card-title"), |
| html.Div(id="status", children="Status: Idle", className="mb-2"), |
| html.H5("Transcript / Minutes / Diarization"), |
| html.Div(id="transcript-preview", style={ |
| "height": "400px", |
| "overflow-y": "scroll", |
| "border": "1px solid #ccc", |
| "padding": "10px", |
| "white-space": "pre-wrap", |
| "word-wrap": "break-word", |
| "background-color": "#f9f9f9" |
| }), |
| ]) |
| ] |
| ), |
| html.Div(id="loading-output", style={"height": "0px", "visibility": "hidden"}), |
| ]), |
| style={'height': '80vh', 'overflow-y': 'auto', 'position': 'relative'} |
| ), width=12, lg=8), |
| ]) |
| ], fluid=True) |
|
|
| @app.callback( |
| Output('session-id', 'data'), |
| Input('session-id', 'data'), |
| prevent_initial_call=False |
| ) |
| def manage_session_id(existing_session_id): |
| session_cookie = flask.request.cookies.get('dash-session-id') |
| ctx = dash.callback_context |
| final_session_id = None |
| source = "none" |
| if existing_session_id and not ctx.triggered: |
| final_session_id = existing_session_id |
| source = "store (initial)" |
| elif existing_session_id and session_cookie == existing_session_id: |
| final_session_id = existing_session_id |
| source = "store/cookie match" |
| elif session_cookie: |
| final_session_id = session_cookie |
| source = "cookie" |
| else: |
| final_session_id = str(uuid.uuid4()) |
| source = "new generation" |
| if final_session_id not in session_data: |
| logging.info(f"Initializing server-side session for ID: {final_session_id} (Source: {source})") |
| get_session_dir(final_session_id) |
| logging.info(f"Manage Session ID - Final ID: {final_session_id}, Source: {source}, Store Input: {existing_session_id}, Cookie Input: {session_cookie}") |
| response = dash.callback_context.response |
| if source == "new generation" or (session_cookie != final_session_id): |
| logging.info(f"Setting session cookie for ID: {final_session_id}") |
| response.set_cookie('dash-session-id', final_session_id, max_age=60*60*24*7) |
| return final_session_id |
|
|
| @app.callback( |
| [ |
| Output("status", "children"), |
| Output("transcript-preview", "children"), |
| Output("minutes-btn", "disabled"), |
| Output("diarize-btn", "disabled"), |
| Output("download-transcript-btn", "disabled"), |
| Output("download-minutes-btn", "disabled"), |
| Output("download-audio-btn", "disabled"), |
| Output("download-diarized-btn", "disabled"), |
| Output("delete-btn", "disabled"), |
| Output("loading-output", "children"), |
| Output("upload-status", "children") |
| ], |
| [ |
| Input('audio-uploader', 'contents'), |
| Input("minutes-btn", "n_clicks"), |
| Input("diarize-btn", "n_clicks"), |
| Input("delete-btn", "n_clicks") |
| ], |
| [ |
| State("session-id", "data"), |
| State("model-selection", "value"), |
| State("transcript-preview", "children"), |
| State('audio-uploader', 'filename') |
| ], |
| prevent_initial_call=True |
| ) |
| def handle_actions(upload_contents, minutes_clicks, diarize_clicks, delete_clicks, session_id, selected_model, existing_preview, filename): |
| if not session_id: |
| logging.warning("Session ID missing in handle_actions.") |
| return "Status: Error - Session ID missing", "", True, True, True, True, True, True, True, None, "Status: Error" |
| ctx = dash.callback_context |
| triggered_id = ctx.triggered_id if hasattr(ctx, 'triggered_id') else (ctx.triggered[0]['prop_id'].split('.')[0] if ctx.triggered else None) |
| current_transcript = session_data[session_id].get("transcript", "") |
| current_minutes = session_data[session_id].get("minutes", "") |
| current_diarized = session_data[session_id].get("diarized", "") |
| current_audio_path = session_data[session_id].get("audio_path", None) |
| original_filename = session_data[session_id].get("original_filename", None) |
| output_text = "" |
| |
| if current_diarized and "Error:" not in current_diarized: |
| output_text = current_diarized |
| elif current_minutes and "Error:" not in current_minutes: |
| output_text = current_minutes |
| elif current_transcript and "Error:" not in current_transcript: |
| output_text = current_transcript |
| else: |
| output_text = "Upload an audio or video file to begin." |
| status_msg = "Status: Idle" |
| if current_diarized and "Error:" not in current_diarized: |
| status_msg = "Status: Session restored. Diarized transcript loaded." |
| elif current_minutes and "Error:" not in current_minutes: |
| status_msg = "Status: Session restored. Minutes loaded." |
| elif current_transcript and "Error:" not in current_transcript: |
| status_msg = "Status: Session restored. Transcript loaded. Ready for Minutes Generation." |
| elif current_audio_path and os.path.exists(current_audio_path): |
| status_msg = f"Status: Session restored. Processed audio loaded ({os.path.basename(original_filename if original_filename else 'file')}). Ready for transcription/minutes." |
| elif original_filename: |
| status_msg = f"Status: Session restored. Previous upload ({original_filename}) might have had issues." |
| minutes_disabled = not bool(current_transcript and "Error:" not in current_transcript) |
| diarize_disabled = not bool(current_transcript and "Error:" not in current_transcript) |
| dl_transcript_disabled = not bool(current_transcript and "Error:" not in current_transcript) |
| dl_minutes_disabled = not bool(current_minutes and "Error:" not in current_minutes) |
| dl_audio_disabled = not bool(current_audio_path and os.path.exists(current_audio_path)) |
| dl_diarized_disabled = not bool(current_diarized and "Error:" not in current_diarized) |
| delete_disabled = not bool(session_data.get(session_id, {}).get("temp_dir")) |
| loading_output = None |
| upload_status_msg = f"Status: {'Loaded: ' + original_filename if original_filename else 'Ready to Upload'}" |
| start_time = time.time() |
| if triggered_id == 'audio-uploader' and upload_contents is not None and filename is not None: |
| logging.info(f"File uploaded for session {session_id}, filename: {filename}") |
| session_data[session_id]["original_filename"] = filename |
| upload_status_msg = f"Status: Processing Uploaded File ({filename})..." |
| status_msg = "Status: Processing Upload..." |
| loading_output = "Processing Upload..." |
| session_dir = get_session_dir(session_id) |
| _, f_ext = os.path.splitext(filename) |
| f_ext_lower = f_ext.lower() |
| if f_ext_lower not in ALLOWED_UPLOAD_EXTENSIONS: |
| status_msg = f"Status: Error - Invalid file type ({f_ext}). Please upload audio or video." |
| output_text = f"Error: Invalid file type ({f_ext}). Allowed types: {', '.join(ALLOWED_UPLOAD_EXTENSIONS)}" |
| upload_status_msg = f"Status: Invalid File Type ({filename})" |
| session_data[session_id]["audio_path"] = None |
| session_data[session_id]["transcript"] = None |
| session_data[session_id]["minutes"] = None |
| session_data[session_id]["diarized"] = None |
| session_data[session_id]["original_filename"] = None |
| minutes_disabled = True |
| diarize_disabled = True |
| dl_transcript_disabled = True |
| dl_minutes_disabled = True |
| dl_diarized_disabled = True |
| dl_audio_disabled = True |
| delete_disabled = False |
| return status_msg, output_text, minutes_disabled, diarize_disabled, dl_transcript_disabled, dl_minutes_disabled, dl_audio_disabled, dl_diarized_disabled, delete_disabled, None, upload_status_msg |
| safe_upload_filename = f"uploaded_file{f_ext}" |
| upload_file_path = os.path.join(session_dir, safe_upload_filename) |
| saved_upload_path = save_base64_data(upload_contents, upload_file_path) |
| if saved_upload_path: |
| audio_path_for_transcription = None |
| is_video = f_ext_lower in ALLOWED_VIDEO_EXTENSIONS |
| if is_video: |
| status_msg = "Status: Extracting audio from video..." |
| upload_status_msg = "Status: Extracting Audio..." |
| loading_output = "Extracting Audio..." |
| extracted_audio_filename = os.path.join(session_dir, f"extracted_audio_{uuid.uuid4()}.mp3") |
| extracted_audio_path = extract_audio_from_video(saved_upload_path, extracted_audio_filename) |
| if extracted_audio_path: |
| audio_path_for_transcription = extracted_audio_path |
| session_data[session_id]["audio_path"] = extracted_audio_path |
| dl_audio_disabled = False |
| try: |
| os.remove(saved_upload_path) |
| logging.info(f"Removed original video file: {saved_upload_path}") |
| except Exception as e: |
| logging.warning(f"Could not remove original video file {saved_upload_path}: {e}") |
| else: |
| status_msg = "Status: Error - Failed to extract audio from video." |
| output_text = "Error: Failed to extract audio from video file. Check if the file is valid." |
| upload_status_msg = f"Status: Error Extracting Audio ({filename})" |
| session_data[session_id]["audio_path"] = None |
| minutes_disabled = True |
| diarize_disabled = True |
| dl_transcript_disabled = True |
| dl_minutes_disabled = True |
| dl_diarized_disabled = True |
| dl_audio_disabled = True |
| delete_disabled = False |
| return status_msg, output_text, minutes_disabled, diarize_disabled, dl_transcript_disabled, dl_minutes_disabled, dl_audio_disabled, dl_diarized_disabled, delete_disabled, None, upload_status_msg |
| else: |
| audio_path_for_transcription = saved_upload_path |
| session_data[session_id]["audio_path"] = saved_upload_path |
| dl_audio_disabled = False |
| if audio_path_for_transcription: |
| logging.info(f"Audio path set for session {session_id}: {audio_path_for_transcription}. Starting transcription.") |
| status_msg = "Status: Transcribing..." |
| upload_status_msg = f"Status: Transcribing ({filename})..." |
| loading_output = "Transcribing..." |
| transcript_text = transcribe_audio(audio_path_for_transcription) |
| session_data[session_id]["transcript"] = transcript_text |
| session_data[session_id]["minutes"] = None |
| session_data[session_id]["diarized"] = None |
| if "Error:" in transcript_text: |
| status_msg = f"Status: Transcription Failed - {transcript_text}" |
| output_text = transcript_text |
| minutes_disabled = True |
| diarize_disabled = True |
| dl_transcript_disabled = True |
| dl_minutes_disabled = True |
| dl_diarized_disabled = True |
| delete_disabled = False |
| upload_status_msg = f"Status: Transcription Failed. ({filename})" |
| else: |
| status_msg = "Status: Transcription Complete. Ready for Minutes/Diarization." |
| output_text = transcript_text |
| minutes_disabled = False |
| diarize_disabled = False |
| dl_transcript_disabled = False |
| dl_minutes_disabled = True |
| dl_diarized_disabled = True |
| delete_disabled = False |
| upload_status_msg = f"Status: Processed & Transcribed: {filename}" |
| processing_time = time.time() - start_time |
| logging.info(f"File processing and transcription took {processing_time:.2f} seconds for session {session_id}") |
| else: |
| status_msg = "Status: Error - Failed to save uploaded file data." |
| output_text = "Failed to save uploaded file data." |
| upload_status_msg = "Status: Error Saving Upload" |
| session_data[session_id]["audio_path"] = None |
| session_data[session_id]["original_filename"] = None |
| minutes_disabled = True |
| diarize_disabled = True |
| dl_transcript_disabled = True |
| dl_minutes_disabled = True |
| dl_diarized_disabled = True |
| dl_audio_disabled = True |
| delete_disabled = False |
| elif triggered_id == "minutes-btn" and minutes_clicks: |
| logging.info(f"Generate Minutes button clicked for session {session_id}") |
| current_transcript = session_data[session_id].get("transcript", "") |
| if current_transcript and "Error:" not in current_transcript: |
| status_msg = f"Status: Generating Minutes ({selected_model})..." |
| loading_output = "Generating Minutes..." |
| minutes_text = generate_minutes_ai(current_transcript, selected_model, session_id) |
| session_data[session_id]["minutes"] = minutes_text |
| |
| if session_data[session_id].get("diarized") and "Error:" not in session_data[session_id]["diarized"]: |
| output_text = session_data[session_id]["diarized"] |
| else: |
| output_text = minutes_text |
| if "Error:" in minutes_text: |
| status_msg = f"Status: Minutes Generation Failed - {minutes_text}" |
| else: |
| status_msg = "Status: Minutes Generation Complete." |
| processing_time = time.time() - start_time |
| logging.info(f"Minutes generation took {processing_time:.2f} seconds for session {session_id}") |
| minutes_disabled = False |
| diarize_disabled = False |
| dl_transcript_disabled = False |
| dl_audio_disabled = not bool(session_data.get(session_id, {}).get("audio_path") and os.path.exists(session_data.get(session_id, {}).get("audio_path", ""))) |
| dl_minutes_disabled = not (minutes_text and "Error:" not in minutes_text) |
| dl_diarized_disabled = not (session_data[session_id].get("diarized") and "Error:" not in session_data[session_id].get("diarized")) |
| delete_disabled = False |
| upload_status_msg = f"Status: Processed & Transcribed: {session_data[session_id].get('original_filename', 'File')}" |
| else: |
| status_msg = "Status: Cannot generate minutes - No valid transcript available." |
| output_text = existing_preview |
| minutes_disabled = True |
| elif triggered_id == "diarize-btn" and diarize_clicks: |
| logging.info(f"Diarize button clicked for session {session_id}") |
| current_transcript = session_data[session_id].get("transcript", "") |
| if current_transcript and "Error:" not in current_transcript: |
| status_msg = f"Status: Diarizing Transcript ({selected_model})..." |
| loading_output = "Diarizing Transcript..." |
| diarized_text = diarize_transcript_ai(current_transcript, selected_model, session_id) |
| session_data[session_id]["diarized"] = diarized_text |
| if "Error:" in diarized_text: |
| status_msg = f"Status: Diarization Failed - {diarized_text}" |
| else: |
| status_msg = "Status: Diarization Complete." |
| output_text = diarized_text |
| minutes_disabled = False |
| diarize_disabled = False |
| dl_transcript_disabled = False |
| dl_audio_disabled = not bool(session_data.get(session_id, {}).get("audio_path") and os.path.exists(session_data.get(session_id, {}).get("audio_path", ""))) |
| dl_minutes_disabled = not (session_data[session_id].get("minutes") and "Error:" not in session_data[session_id].get("minutes")) |
| dl_diarized_disabled = not (diarized_text and "Error:" not in diarized_text) |
| delete_disabled = False |
| upload_status_msg = f"Status: Processed & Transcribed: {session_data[session_id].get('original_filename', 'File')}" |
| else: |
| status_msg = "Status: Cannot diarize - No valid transcript available." |
| output_text = existing_preview |
| diarize_disabled = True |
| elif triggered_id == "delete-btn" and delete_clicks: |
| logging.info(f"Delete button clicked for session {session_id}") |
| cleanup_session(session_id) |
| status_msg = "Status: All session data deleted." |
| output_text = "Session data cleared. Upload a new file." |
| minutes_disabled = True |
| diarize_disabled = True |
| dl_transcript_disabled = True |
| dl_minutes_disabled = True |
| dl_diarized_disabled = True |
| dl_audio_disabled = True |
| delete_disabled = True |
| upload_status_msg = "Status: Ready to Upload" |
| else: |
| loaded_audio_path = session_data.get(session_id, {}).get("audio_path") |
| loaded_transcript = session_data.get(session_id, {}).get("transcript") |
| loaded_minutes = session_data.get(session_id, {}).get("minutes") |
| loaded_diarized = session_data.get(session_id, {}).get("diarized") |
| temp_dir_exists = bool(session_data.get(session_id, {}).get("temp_dir")) |
| loaded_original_filename = session_data.get(session_id, {}).get("original_filename") |
| dl_audio_disabled = not (loaded_audio_path and os.path.exists(loaded_audio_path)) |
| minutes_disabled = not (loaded_transcript and "Error:" not in loaded_transcript) |
| diarize_disabled = not (loaded_transcript and "Error:" not in loaded_transcript) |
| dl_transcript_disabled = not (loaded_transcript and "Error:" not in loaded_transcript) |
| dl_minutes_disabled = not (loaded_minutes and "Error:" not in loaded_minutes) |
| dl_diarized_disabled = not (loaded_diarized and "Error:" not in loaded_diarized) |
| delete_disabled = not (loaded_audio_path or loaded_transcript or loaded_minutes or loaded_diarized or temp_dir_exists or loaded_original_filename) |
| |
| if loaded_diarized and "Error:" not in loaded_diarized: |
| output_text = loaded_diarized |
| elif loaded_minutes and "Error:" not in loaded_minutes: |
| output_text = loaded_minutes |
| elif loaded_transcript and "Error:" not in loaded_transcript: |
| output_text = loaded_transcript |
| else: |
| output_text = "Upload an audio or video file to begin." |
| if loaded_original_filename and dl_audio_disabled and not loaded_transcript: |
| upload_status_msg = f"Status: Error processing {loaded_original_filename}?" |
| elif loaded_audio_path and os.path.exists(loaded_audio_path): |
| upload_status_msg = f"Status: Processed audio loaded ({loaded_original_filename or 'previous file'})." |
| else: |
| upload_status_msg = "Status: Ready to Upload" |
| return ( |
| status_msg, |
| output_text, |
| minutes_disabled, |
| diarize_disabled, |
| dl_transcript_disabled, |
| dl_minutes_disabled, |
| dl_audio_disabled, |
| dl_diarized_disabled, |
| delete_disabled, |
| loading_output, |
| upload_status_msg |
| ) |
|
|
| @app.callback( |
| Output("download-transcript", "data"), |
| Input("download-transcript-btn", "n_clicks"), |
| State("session-id", "data"), |
| prevent_initial_call=True, |
| ) |
| def download_transcript_file(n_clicks, session_id): |
| if not session_id or not session_data.get(session_id, {}).get("transcript"): |
| logging.warning(f"Download transcript requested but no data found for session {session_id}.") |
| return None |
| transcript = session_data[session_id]["transcript"] |
| if "Error:" in transcript: |
| logging.warning(f"Attempted to download transcript containing an error for session {session_id}.") |
| return None |
| session_dir = get_session_dir(session_id) |
| transcript_filename = os.path.join(session_dir, f"transcript_{uuid.uuid4()}.docx") |
| saved_doc_path = save_to_word(transcript, transcript_filename) |
| if saved_doc_path: |
| logging.info(f"Sending transcript file: {saved_doc_path}") |
| original_filename_base = os.path.splitext(session_data[session_id].get("original_filename", "meeting"))[0] |
| download_filename = f"{original_filename_base}_transcript.docx" |
| return dcc.send_file(saved_doc_path, filename=download_filename) |
| else: |
| logging.error(f"Failed to create Word document for transcript download for session {session_id}") |
| return dcc.send_data_frame(lambda: transcript, "meeting_transcript.txt") |
|
|
| @app.callback( |
| Output("download-minutes", "data"), |
| Input("download-minutes-btn", "n_clicks"), |
| State("session-id", "data"), |
| prevent_initial_call=True, |
| ) |
| def download_minutes_file(n_clicks, session_id): |
| if not session_id or not session_data.get(session_id, {}).get("minutes"): |
| logging.warning(f"Download minutes requested but no data found for session {session_id}.") |
| return None |
| minutes = session_data[session_id]["minutes"] |
| if "Error:" in minutes: |
| logging.warning(f"Attempted to download minutes containing an error for session {session_id}.") |
| return None |
| session_dir = get_session_dir(session_id) |
| minutes_filename = os.path.join(session_dir, f"meeting_minutes_{uuid.uuid4()}.docx") |
| saved_doc_path = save_to_word(minutes, minutes_filename) |
| if saved_doc_path: |
| logging.info(f"Sending minutes file: {saved_doc_path}") |
| original_filename_base = os.path.splitext(session_data[session_id].get("original_filename", "meeting"))[0] |
| download_filename = f"{original_filename_base}_minutes.docx" |
| return dcc.send_file(saved_doc_path, filename=download_filename) |
| else: |
| logging.error(f"Failed to create Word document for minutes download for session {session_id}") |
| return dcc.send_data_frame(lambda: minutes, "meeting_minutes.txt") |
|
|
| @app.callback( |
| Output("download-audio", "data"), |
| Input("download-audio-btn", "n_clicks"), |
| State("session-id", "data"), |
| prevent_initial_call=True, |
| ) |
| def download_audio_file(n_clicks, session_id): |
| if not session_id or not session_data.get(session_id, {}).get("audio_path"): |
| logging.warning(f"Download audio requested but no processed audio path found for session {session_id}.") |
| return None |
| audio_path = session_data[session_id]["audio_path"] |
| original_filename = session_data[session_id].get("original_filename", "meeting_audio") |
| if os.path.exists(audio_path): |
| logging.info(f"Sending processed audio file: {audio_path}") |
| original_filename_base = os.path.splitext(original_filename)[0] |
| _, current_ext = os.path.splitext(audio_path) |
| download_filename = f"{original_filename_base}_processed_audio{current_ext}" |
| return dcc.send_file(audio_path, filename=download_filename) |
| else: |
| logging.error(f"Processed audio file not found at path {audio_path} for session {session_id}") |
| return None |
|
|
| @app.callback( |
| Output("download-diarized", "data"), |
| Input("download-diarized-btn", "n_clicks"), |
| State("session-id", "data"), |
| prevent_initial_call=True, |
| ) |
| def download_diarized_file(n_clicks, session_id): |
| if not session_id or not session_data.get(session_id, {}).get("diarized"): |
| logging.warning(f"Download diarized transcript requested but no data found for session {session_id}.") |
| return None |
| diarized = session_data[session_id]["diarized"] |
| if "Error:" in diarized: |
| logging.warning(f"Attempted to download diarized transcript containing an error for session {session_id}.") |
| return None |
| session_dir = get_session_dir(session_id) |
| diarized_filename = os.path.join(session_dir, f"diarized_{uuid.uuid4()}.docx") |
| saved_doc_path = save_to_word(diarized, diarized_filename) |
| if saved_doc_path: |
| logging.info(f"Sending diarized transcript file: {saved_doc_path}") |
| original_filename_base = os.path.splitext(session_data[session_id].get("original_filename", "meeting"))[0] |
| download_filename = f"{original_filename_base}_diarized.docx" |
| return dcc.send_file(saved_doc_path, filename=download_filename) |
| else: |
| logging.error(f"Failed to create Word document for diarized transcript download for session {session_id}") |
| return dcc.send_data_frame(lambda: diarized, "meeting_diarized.txt") |
|
|
| if __name__ == '__main__': |
| print("Starting the Dash application...") |
| app.run(debug=False, host='0.0.0.0', port=7860) |
| print("Dash application has finished running.") |