samir72 commited on
Commit
7bb3812
·
1 Parent(s): 9e2fc27

Add app.py and requirements.txt for HF space

Browse files
Files changed (2) hide show
  1. app.py +139 -0
  2. requirements.txt +1 -0
app.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import tempfile
4
+ import requests
5
+ import gradio as gr
6
+ from dotenv import load_dotenv
7
+ from openai import AzureOpenAI # official OpenAI SDK, works with Azure endpoints
8
+
9
+ # --- LLM call (Azure OpenAI with API key) -----------------------------------
10
+
11
+ def summarize_audio_b64(audio_b64: str, sys_prompt: str, user_prompt: str) -> str:
12
+ """
13
+ Calls Azure OpenAI Chat Completions with audio input (base64 mp3).
14
+ """
15
+ load_dotenv()
16
+
17
+ endpoint = os.getenv("AC_OPENAI_ENDPOINT")
18
+ api_key = os.getenv("AC_OPENAI_API_KEY")
19
+ deployment = os.getenv("AC_MODEL_DEPLOYMENT")
20
+ api_version = os.getenv("AC_OPENAI_API_VERSION")
21
+
22
+ if not endpoint or not api_key or not deployment:
23
+ return "Server misconfiguration: required env vars missing."
24
+
25
+ try:
26
+ client = AzureOpenAI(
27
+ api_key=api_key,
28
+ api_version=api_version,
29
+ azure_endpoint=endpoint,
30
+ )
31
+
32
+ system_message = sys_prompt.strip() if sys_prompt else (
33
+ "You are an AI assistant with a charter to clearly analyze the customer enquiry."
34
+ )
35
+ user_text = user_prompt.strip() if user_prompt else "Summarize the audio content."
36
+
37
+ response = client.chat.completions.create(
38
+ model=deployment,
39
+ messages=[
40
+ {"role": "system", "content": system_message},
41
+ {
42
+ "role": "user",
43
+ "content": [
44
+ {"type": "text", "text": user_text},
45
+ {
46
+ "type": "input_audio",
47
+ "input_audio": {"data": audio_b64, "format": "mp3"},
48
+ },
49
+ ],
50
+ },
51
+ ],
52
+ )
53
+
54
+ return response.choices[0].message.content
55
+
56
+ except Exception as ex:
57
+ return f"Error from Azure OpenAI: {ex}"
58
+
59
+
60
+ # --- I/O helpers ------------------------------------------------------------
61
+
62
+ def encode_audio_from_path(path: str) -> str:
63
+ with open(path, "rb") as f:
64
+ return base64.b64encode(f.read()).decode("utf-8")
65
+
66
+
67
+ def download_to_temp_mp3(url: str) -> str:
68
+ r = requests.get(url, stream=True, timeout=30)
69
+ r.raise_for_status()
70
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
71
+ for chunk in r.iter_content(chunk_size=8192):
72
+ if chunk:
73
+ tmp.write(chunk)
74
+ return tmp.name
75
+
76
+
77
+ def process_audio(upload_path, record_path, url, sys_prompt, user_prompt):
78
+ tmp_to_cleanup = []
79
+ try:
80
+ audio_path = None
81
+ if upload_path:
82
+ audio_path = upload_path
83
+ elif record_path:
84
+ audio_path = record_path
85
+ elif url and url.strip():
86
+ audio_path = download_to_temp_mp3(url.strip())
87
+ tmp_to_cleanup.append(audio_path)
88
+
89
+ if not audio_path:
90
+ return "Please provide an audio file via upload, recording, or URL."
91
+
92
+ audio_b64 = encode_audio_from_path(audio_path)
93
+ return summarize_audio_b64(audio_b64, sys_prompt, user_prompt)
94
+
95
+ finally:
96
+ for p in tmp_to_cleanup:
97
+ try:
98
+ if os.path.exists(p):
99
+ os.remove(p)
100
+ except Exception:
101
+ pass
102
+
103
+
104
+ # --- UI ---------------------------------------------------------------------
105
+
106
+ with gr.Blocks(title="Audio Summarizer") as demo:
107
+ gr.Markdown("# Audio File Summarizer (Azure OpenAI)")
108
+ gr.Markdown("Upload a mp3, record audio, or paste a URL. The app sends base64 audio to Azure OpenAI.")
109
+
110
+ with gr.Row():
111
+ with gr.Column():
112
+ upload_audio = gr.Audio(sources=["upload"], type="filepath", label="Upload mp3")
113
+ with gr.Column():
114
+ record_audio = gr.Audio(sources=["microphone"], type="filepath", label="Record Audio")
115
+ with gr.Column():
116
+ url_input = gr.Textbox(label="mp3 URL", placeholder="https://example.com/audio.mp3")
117
+
118
+ with gr.Row():
119
+ userprompt_input = gr.Textbox(
120
+ label="User Prompt",
121
+ value="Summarize the audio content",
122
+ placeholder="e.g., Extract key points and action items",
123
+ )
124
+ sysprompt_input = gr.Textbox(
125
+ label="System Prompt",
126
+ value="You are an AI assistant with a listening charter to clearly analyze the customer enquiry.",
127
+ )
128
+
129
+ submit_btn = gr.Button("Summarize")
130
+ output = gr.Textbox(label="Summary", lines=12)
131
+
132
+ submit_btn.click(
133
+ fn=process_audio,
134
+ inputs=[upload_audio, record_audio, url_input, sysprompt_input, userprompt_input],
135
+ outputs=output,
136
+ )
137
+
138
+ if __name__ == "__main__":
139
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-dotenv==1.1.1 gradio==5.45.0 requests==2.32.5 azure-identity==1.25.0 azure-ai-projects==1.0.0 numpy==1.26.4 openai==1.107.3