Spaces:
Running
Running
| import os | |
| import base64 | |
| import tempfile | |
| import requests | |
| from datetime import datetime | |
| import gradio as gr | |
| from dotenv import load_dotenv | |
| from openai import AzureOpenAI # official OpenAI SDK, works with Azure endpoints | |
| import json | |
| import subprocess # to execute youtube-dl version | |
| import Youtubetranscription_summarizer | |
| # --- LLM call (Azure OpenAI with API key) ----------------------------------- | |
| def summarize_audio_b64(audio_b64: str, sys_prompt: str, user_prompt: str) -> str: | |
| """ | |
| Calls Azure OpenAI Chat Completions with audio input (base64 mp3). | |
| """ | |
| load_dotenv() | |
| endpoint = os.getenv("AC_OPENAI_ENDPOINT") | |
| api_key = os.getenv("AC_OPENAI_API_KEY") | |
| deployment = os.getenv("AC_MODEL_DEPLOYMENT") | |
| api_version = os.getenv("AC_OPENAI_API_VERSION") | |
| if not endpoint or not api_key or not deployment: | |
| return "Server misconfiguration: required env vars missing." | |
| try: | |
| client = AzureOpenAI( | |
| api_key=api_key, | |
| api_version=api_version, | |
| azure_endpoint=endpoint, | |
| ) | |
| system_message = sys_prompt.strip() if sys_prompt else ( | |
| "You are an AI assistant with a charter to clearly analyze the customer enquiry." | |
| ) | |
| user_text = user_prompt.strip() if user_prompt else "Summarize the audio content." | |
| response = client.chat.completions.create( | |
| model=deployment, | |
| messages=[ | |
| {"role": "system", "content": system_message}, | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": user_text}, | |
| { | |
| "type": "input_audio", | |
| #"input_audio": {"data": audio_b64, "format": "mp3"}, | |
| "input_audio": {"data": audio_b64, "format": "wav"}, | |
| }, | |
| ], | |
| }, | |
| ], | |
| ) | |
| print(f"Azure API call at {datetime.now()}: prompt_length={len(user_prompt)}, audio_size={len(audio_b64)}") | |
| return response.choices[0].message.content | |
| except Exception as ex: | |
| return print(f"Error from Azure OpenAI: {ex}") | |
| #pass | |
| #----Retrieve meta data from metadata.json file------------------------------ | |
| def retrieve_file_path(file_name): | |
| path = os.path.dirname(os.path.abspath(__file__)) | |
| file_path = os.path.join(path, file_name) | |
| if os.path.isfile(file_path): | |
| return file_path | |
| elif not os.path.exists(file_path): | |
| print(f"'{file_path}' does not exist.") | |
| return None | |
| return None | |
| def retrieve_json_record(file_path, record_id): | |
| with open(file_path, 'r') as file: | |
| data = json.load(file) | |
| if isinstance(data, list): | |
| for record in data: | |
| if record.get('metadata', {}).get('id') == record_id: | |
| return record | |
| elif isinstance(data, dict): | |
| if data.get('metadata', {}).get('id') == record_id: | |
| return data | |
| return None | |
| # --- I/O helpers ------------------------------------------------------------ | |
| def encode_audio_from_path(path: str) -> str: | |
| with open(path, "rb") as f: | |
| return base64.b64encode(f.read()).decode("utf-8") | |
| def download_to_temp_mp3(url: str) -> str: | |
| r = requests.get(url, stream=True, timeout=30) | |
| r.raise_for_status() | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp: | |
| for chunk in r.iter_content(chunk_size=8192): | |
| if chunk: | |
| tmp.write(chunk) | |
| return tmp.name | |
| def process_audio(upload_path, record_path, url, sys_prompt, user_prompt): | |
| tmp_to_cleanup = [] | |
| try: | |
| audio_path = None | |
| if upload_path: | |
| audio_path = upload_path | |
| elif record_path: | |
| audio_path = record_path | |
| elif url and url.strip(): | |
| #audio_path = download_to_temp_mp3(url.strip()) | |
| audio_path = Youtubetranscription_summarizer.main(url.strip()) | |
| tmp_to_cleanup.append(audio_path) | |
| if not audio_path: | |
| return "Please provide an audio file via upload, recording, or URL." | |
| audio_b64 = encode_audio_from_path(audio_path) | |
| return summarize_audio_b64(audio_b64, sys_prompt, user_prompt) | |
| except Exception as e: | |
| return print(f"Error processing audio at {datetime.now()}: prompt_length={len(user_prompt)}, audio_path={audio_path}: {str(e)}") | |
| finally: | |
| for p in tmp_to_cleanup: | |
| try: | |
| if os.path.exists(p): | |
| os.remove(p) | |
| except Exception: | |
| pass | |
| # --- UI --------------------------------------------------------------------- | |
| with gr.Blocks(title="Audio Summarizer") as demo: | |
| gr.Markdown("# Audio File Summarizer (Azure OpenAI)") | |
| gr.Markdown("Upload a mp3, record audio, or paste a URL. The app sends base64 audio to Azure OpenAI.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| upload_audio = gr.Audio(sources=["upload"], type="filepath", label="Upload mp3") | |
| with gr.Column(): | |
| record_audio = gr.Audio(sources=["microphone"], type="filepath", label="Record Audio") | |
| with gr.Column(): | |
| url_input = gr.Textbox(label="mp3 URL", placeholder="https://example.com/audio.mp3") | |
| ### Get system and user prompts from metadata.json file | |
| file_name = 'metadata.json' | |
| record_id = '1' | |
| file_path = retrieve_file_path(file_name) | |
| jsonrecord = retrieve_json_record(file_path, record_id) | |
| if jsonrecord: | |
| print(json.dumps(jsonrecord, indent=2)) | |
| else: | |
| print("Record not found.") | |
| sysprompt_default = jsonrecord['metadata']['content']['system_prompt']['content'] | |
| userprompt_default = jsonrecord['metadata']['content']['user_prompt']['content'] | |
| with gr.Row(): | |
| userprompt_input = gr.Textbox( | |
| label="User Prompt", | |
| #value="Summarize the audio content", | |
| value=userprompt_default, | |
| placeholder="e.g., Extract key points and action items", | |
| ) | |
| sysprompt_input = gr.Textbox( | |
| label="System Prompt", | |
| #value="You are an AI assistant with a charter to clearly analyze the customer enquiry.", | |
| value=sysprompt_default, | |
| ) | |
| submit_btn = gr.Button("Summarize") | |
| output = gr.Textbox(label="Summary", lines=12) | |
| # Capture inputs for logging | |
| if upload_audio: | |
| upload_audio.change( | |
| fn=lambda x: print(f"Upload audio selected: {x}"), | |
| inputs=[upload_audio], | |
| outputs=[], | |
| # Reset other inputs to avoid confusion | |
| ) | |
| if record_audio: | |
| record_audio.change( | |
| fn=lambda x: print(f"Record audio selected: {x}"), | |
| inputs=[record_audio], | |
| outputs=[], | |
| ) | |
| if url_input: | |
| url_input.change( | |
| fn=lambda x: print(f"URL input changed: {x}"), | |
| inputs=[url_input], | |
| outputs=[], | |
| ) | |
| submit_btn.click( | |
| fn=process_audio, | |
| inputs=[upload_audio, record_audio, url_input, sysprompt_input, userprompt_input], | |
| outputs=output, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |