Commit
·
b9e8b68
1
Parent(s):
f5d1478
:sparkles: Adding audio modality
Browse files
app.py
CHANGED
|
@@ -97,43 +97,39 @@ def load_file_from_response(response):
|
|
| 97 |
return {"type": "image", "data": Image.open(io.BytesIO(content_bytes))}
|
| 98 |
|
| 99 |
elif "audio/" in content_type:
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
"data": {"array": audio_data, "sample_rate": sample_rate},
|
| 104 |
-
}
|
| 105 |
|
| 106 |
elif "application/octet-stream" in content_type:
|
| 107 |
# Try Excel
|
| 108 |
try:
|
| 109 |
excel_data = pd.read_excel(io.BytesIO(content_bytes))
|
| 110 |
return {"type": "excel", "data": excel_data}
|
| 111 |
-
except Exception:
|
| 112 |
-
|
| 113 |
|
| 114 |
# Try image
|
| 115 |
try:
|
| 116 |
img = Image.open(io.BytesIO(content_bytes))
|
| 117 |
return {"type": "image", "data": img}
|
| 118 |
except UnidentifiedImageError:
|
| 119 |
-
|
| 120 |
|
| 121 |
# Try audio
|
| 122 |
try:
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
except RuntimeError:
|
| 129 |
-
pass
|
| 130 |
|
| 131 |
# Try UTF-8 text
|
| 132 |
try:
|
| 133 |
text = content_bytes.decode("utf-8")
|
| 134 |
return {"type": "text", "data": text}
|
| 135 |
except UnicodeDecodeError:
|
| 136 |
-
|
| 137 |
|
| 138 |
return {"type": "binary", "data": content_bytes}
|
| 139 |
|
|
@@ -158,20 +154,6 @@ def load_image(image_path: str) -> str:
|
|
| 158 |
return f"data:image/jpeg;base64,{encoded}"
|
| 159 |
|
| 160 |
|
| 161 |
-
def load_audio(audio_path: str) -> str:
|
| 162 |
-
"""Encodes audio as base64 for GPT-4o (if needed)."""
|
| 163 |
-
with open(audio_path, "rb") as f:
|
| 164 |
-
encoded = base64.b64encode(f.read()).decode("utf-8")
|
| 165 |
-
return f"data:audio/wav;base64,{encoded}"
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
def transcribe_audio(audio_path: str) -> str:
|
| 169 |
-
"""Transcribes audio file using OpenAI Whisper model (whisper-1)."""
|
| 170 |
-
with open(audio_path, "rb") as f:
|
| 171 |
-
transcript = openai.Audio.transcribe("whisper-1", f)
|
| 172 |
-
return transcript.get("text", "")
|
| 173 |
-
|
| 174 |
-
|
| 175 |
def describe_image(image_path: str) -> str:
|
| 176 |
"""Sends image directly to GPT-4o to describe it."""
|
| 177 |
image_base64 = load_image(image_path)
|
|
|
|
| 97 |
return {"type": "image", "data": Image.open(io.BytesIO(content_bytes))}
|
| 98 |
|
| 99 |
elif "audio/" in content_type:
|
| 100 |
+
# Transcribe audio using OpenAI Whisper
|
| 101 |
+
transcript = openai.Audio.transcribe("whisper-1", io.BytesIO(content_bytes))
|
| 102 |
+
return {"type": "text", "data": transcript.get("text", "")}
|
|
|
|
|
|
|
| 103 |
|
| 104 |
elif "application/octet-stream" in content_type:
|
| 105 |
# Try Excel
|
| 106 |
try:
|
| 107 |
excel_data = pd.read_excel(io.BytesIO(content_bytes))
|
| 108 |
return {"type": "excel", "data": excel_data}
|
| 109 |
+
except Exception as e:
|
| 110 |
+
print(f"Error loading excel")
|
| 111 |
|
| 112 |
# Try image
|
| 113 |
try:
|
| 114 |
img = Image.open(io.BytesIO(content_bytes))
|
| 115 |
return {"type": "image", "data": img}
|
| 116 |
except UnidentifiedImageError:
|
| 117 |
+
print(f"Error loading image")
|
| 118 |
|
| 119 |
# Try audio
|
| 120 |
try:
|
| 121 |
+
# Transcribe audio from raw bytes
|
| 122 |
+
transcript = openai.Audio.transcribe(model="whisper-1", file=io.BytesIO(content_bytes))
|
| 123 |
+
return {"type": "text", "data": transcript.get("text", "")}
|
| 124 |
+
except Exception as e:
|
| 125 |
+
print(f"Error transcribing audio")
|
|
|
|
|
|
|
| 126 |
|
| 127 |
# Try UTF-8 text
|
| 128 |
try:
|
| 129 |
text = content_bytes.decode("utf-8")
|
| 130 |
return {"type": "text", "data": text}
|
| 131 |
except UnicodeDecodeError:
|
| 132 |
+
print(f"Error decoding UTF-8")
|
| 133 |
|
| 134 |
return {"type": "binary", "data": content_bytes}
|
| 135 |
|
|
|
|
| 154 |
return f"data:image/jpeg;base64,{encoded}"
|
| 155 |
|
| 156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
def describe_image(image_path: str) -> str:
|
| 158 |
"""Sends image directly to GPT-4o to describe it."""
|
| 159 |
image_base64 = load_image(image_path)
|