Update app.py
Browse files
app.py
CHANGED
|
@@ -23,6 +23,7 @@ MIME_EXT = {
|
|
| 23 |
}
|
| 24 |
|
| 25 |
def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
|
|
|
|
| 26 |
print(f" โ [_dataurl_to_file] ้ๅง่็ data URL...")
|
| 27 |
try:
|
| 28 |
header, b64 = data_url.split(",", 1)
|
|
@@ -75,6 +76,7 @@ def _extract_effective_path(file_obj) -> str:
|
|
| 75 |
|
| 76 |
# ====== ๅๆฎต่็ ======
|
| 77 |
def split_audio(path):
|
|
|
|
| 78 |
size = os.path.getsize(path)
|
| 79 |
print(f"[split_audio] ๆชๆกๅคงๅฐ: {size} bytes ({size/1024/1024:.2f} MB)")
|
| 80 |
if size <= MAX_SIZE:
|
|
@@ -94,12 +96,14 @@ def split_audio(path):
|
|
| 94 |
|
| 95 |
# ====== ่ฝ้ๆ ธๅฟ ======
|
| 96 |
def transcribe_core(path, model="whisper-1"):
|
|
|
|
| 97 |
print(f"\n{'='*60}")
|
| 98 |
print(f"[transcribe_core] ้ๅง่ฝ้: {path}")
|
| 99 |
print(f"{'='*60}")
|
| 100 |
|
| 101 |
start_time = time.time()
|
| 102 |
|
|
|
|
| 103 |
if path.lower().endswith(".mp4"):
|
| 104 |
fixed = path[:-4] + ".m4a"
|
| 105 |
try:
|
|
@@ -108,6 +112,7 @@ def transcribe_core(path, model="whisper-1"):
|
|
| 108 |
except:
|
| 109 |
pass
|
| 110 |
|
|
|
|
| 111 |
chunks = split_audio(path)
|
| 112 |
print(f"\n[transcribe_core] === Whisper ่ฝ้ ({len(chunks)} ็ๆฎต) ===")
|
| 113 |
raw = []
|
|
@@ -123,6 +128,7 @@ def transcribe_core(path, model="whisper-1"):
|
|
| 123 |
raw_txt = "\n".join(raw)
|
| 124 |
print(f"[transcribe_core] ๅๅง่ฝ้: {len(raw_txt)} ๅญๅ
")
|
| 125 |
|
|
|
|
| 126 |
print(f"\n[transcribe_core] === ็ฐก่ฝ็น ===")
|
| 127 |
conv = client.chat.completions.create(
|
| 128 |
model="gpt-4o-mini",
|
|
@@ -135,6 +141,7 @@ def transcribe_core(path, model="whisper-1"):
|
|
| 135 |
trad = conv.choices[0].message.content.strip()
|
| 136 |
print(f"[transcribe_core] โ
็น้ซ่ฝๆๅฎๆ: {len(trad)} ๅญๅ
")
|
| 137 |
|
|
|
|
| 138 |
print(f"\n[transcribe_core] === AI ๆ่ฆ ===")
|
| 139 |
summ = client.chat.completions.create(
|
| 140 |
model="gpt-4o-mini",
|
|
@@ -155,19 +162,38 @@ def transcribe_core(path, model="whisper-1"):
|
|
| 155 |
|
| 156 |
# ====== Gradio UI ๅฝๅผ ======
|
| 157 |
def transcribe_ui(password, file):
|
|
|
|
| 158 |
print(f"\n๐ [UI] ็ถฒ้ ็่ซๆฑ")
|
| 159 |
if not password or password.strip() != PASSWORD:
|
| 160 |
-
return "โ
|
| 161 |
if not file:
|
| 162 |
-
return "โ ๏ธ No file
|
| 163 |
try:
|
|
|
|
|
|
|
|
|
|
| 164 |
path = _extract_effective_path(file)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
text, summary = transcribe_core(path)
|
| 166 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
except Exception as e:
|
| 168 |
import traceback
|
| 169 |
-
|
| 170 |
-
|
|
|
|
| 171 |
|
| 172 |
# ====== ๅปบ็ซ FastAPI ๆ็จ ======
|
| 173 |
fastapi_app = FastAPI()
|
|
@@ -252,119 +278,530 @@ async def api_transcribe_sync(request: Request):
|
|
| 252 |
content={"status": "error", "error": str(e)}
|
| 253 |
)
|
| 254 |
|
| 255 |
-
# ======
|
| 256 |
-
|
| 257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
with gr.Column(scale=2):
|
| 267 |
-
status_ui = gr.Textbox(label="Status", interactive=False)
|
| 268 |
-
transcript_ui = gr.Textbox(label="Transcription Result", lines=10)
|
| 269 |
-
summary_ui = gr.Textbox(label="AI Summary", lines=6)
|
| 270 |
-
|
| 271 |
-
btn_ui.click(transcribe_ui, [pw_ui, file_ui], [status_ui, transcript_ui, summary_ui])
|
| 272 |
|
| 273 |
-
with gr.
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
"
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
}
|
| 24 |
|
| 25 |
def _dataurl_to_file(data_url: str, orig_name: str | None = None) -> str:
|
| 26 |
+
"""ๅฐ data URL ่ฝๆ็บๆฌๅฐๆชๆก"""
|
| 27 |
print(f" โ [_dataurl_to_file] ้ๅง่็ data URL...")
|
| 28 |
try:
|
| 29 |
header, b64 = data_url.split(",", 1)
|
|
|
|
| 76 |
|
| 77 |
# ====== ๅๆฎต่็ ======
|
| 78 |
def split_audio(path):
|
| 79 |
+
"""ๅฐ้ณ่จๆชๆกๅๅฒๆๅคๅๅฐๆผ 25MB ็็ๆฎต"""
|
| 80 |
size = os.path.getsize(path)
|
| 81 |
print(f"[split_audio] ๆชๆกๅคงๅฐ: {size} bytes ({size/1024/1024:.2f} MB)")
|
| 82 |
if size <= MAX_SIZE:
|
|
|
|
| 96 |
|
| 97 |
# ====== ่ฝ้ๆ ธๅฟ ======
|
| 98 |
def transcribe_core(path, model="whisper-1"):
|
| 99 |
+
"""ไฝฟ็จ Whisper ้ฒ่ก่ช้ณ่ฝ้๏ผไธฆไฝฟ็จ GPT ้ฒ่ก็น็ฐก่ฝๆๅๆ่ฆ"""
|
| 100 |
print(f"\n{'='*60}")
|
| 101 |
print(f"[transcribe_core] ้ๅง่ฝ้: {path}")
|
| 102 |
print(f"{'='*60}")
|
| 103 |
|
| 104 |
start_time = time.time()
|
| 105 |
|
| 106 |
+
# ่็ MP4 ๆ ผๅผ
|
| 107 |
if path.lower().endswith(".mp4"):
|
| 108 |
fixed = path[:-4] + ".m4a"
|
| 109 |
try:
|
|
|
|
| 112 |
except:
|
| 113 |
pass
|
| 114 |
|
| 115 |
+
# ๅๅฒ้ณ่จ
|
| 116 |
chunks = split_audio(path)
|
| 117 |
print(f"\n[transcribe_core] === Whisper ่ฝ้ ({len(chunks)} ็ๆฎต) ===")
|
| 118 |
raw = []
|
|
|
|
| 128 |
raw_txt = "\n".join(raw)
|
| 129 |
print(f"[transcribe_core] ๅๅง่ฝ้: {len(raw_txt)} ๅญๅ
")
|
| 130 |
|
| 131 |
+
# ็ฐก่ฝ็น
|
| 132 |
print(f"\n[transcribe_core] === ็ฐก่ฝ็น ===")
|
| 133 |
conv = client.chat.completions.create(
|
| 134 |
model="gpt-4o-mini",
|
|
|
|
| 141 |
trad = conv.choices[0].message.content.strip()
|
| 142 |
print(f"[transcribe_core] โ
็น้ซ่ฝๆๅฎๆ: {len(trad)} ๅญๅ
")
|
| 143 |
|
| 144 |
+
# AI ๆ่ฆ
|
| 145 |
print(f"\n[transcribe_core] === AI ๆ่ฆ ===")
|
| 146 |
summ = client.chat.completions.create(
|
| 147 |
model="gpt-4o-mini",
|
|
|
|
| 162 |
|
| 163 |
# ====== Gradio UI ๅฝๅผ ======
|
| 164 |
def transcribe_ui(password, file):
|
| 165 |
+
"""็ถฒ้ ็้ข็่ฝ้่็ๅฝๅผ"""
|
| 166 |
print(f"\n๐ [UI] ็ถฒ้ ็่ซๆฑ")
|
| 167 |
if not password or password.strip() != PASSWORD:
|
| 168 |
+
return "๐ Authentication", "โ Incorrect password. Please check and try again.", "", ""
|
| 169 |
if not file:
|
| 170 |
+
return "โ ๏ธ No File", "Please upload an audio file first.", "", ""
|
| 171 |
try:
|
| 172 |
+
# ๆดๆฐ็ๆ
็บ่็ไธญ
|
| 173 |
+
yield "โณ Processing", "๐ต Audio file received, starting transcription...", "", ""
|
| 174 |
+
|
| 175 |
path = _extract_effective_path(file)
|
| 176 |
+
|
| 177 |
+
# ็ฒๅๆไปถไฟกๆฏ
|
| 178 |
+
file_size = os.path.getsize(path)
|
| 179 |
+
file_size_mb = file_size / 1024 / 1024
|
| 180 |
+
|
| 181 |
+
yield "๐ฏ Transcribing", f"๐ File size: {file_size_mb:.2f} MB\n๐ Processing with Whisper AI...", "", ""
|
| 182 |
+
|
| 183 |
text, summary = transcribe_core(path)
|
| 184 |
+
|
| 185 |
+
# ่จ็ฎๅญๆธ
|
| 186 |
+
char_count = len(text)
|
| 187 |
+
word_estimate = char_count // 2 # ไธญๆไผฐ็ฎ
|
| 188 |
+
|
| 189 |
+
status_msg = f"โ
Transcription Complete\n๐ {char_count} characters ({word_estimate} words approx.)"
|
| 190 |
+
|
| 191 |
+
return "โ
Success", status_msg, text, summary
|
| 192 |
except Exception as e:
|
| 193 |
import traceback
|
| 194 |
+
error_trace = traceback.format_exc()
|
| 195 |
+
print(f"โ [UI] ้ฏ่ชค:\n{error_trace}")
|
| 196 |
+
return "โ Error", f"An error occurred during processing:\n{str(e)}", "", ""
|
| 197 |
|
| 198 |
# ====== ๅปบ็ซ FastAPI ๆ็จ ======
|
| 199 |
fastapi_app = FastAPI()
|
|
|
|
| 278 |
content={"status": "error", "error": str(e)}
|
| 279 |
)
|
| 280 |
|
| 281 |
+
# ====== ่ชๅฎ็พฉ CSS ======
|
| 282 |
+
custom_css = """
|
| 283 |
+
/* ๅ
จๅฑๆจฃๅผ */
|
| 284 |
+
.gradio-container {
|
| 285 |
+
max-width: 1400px !important;
|
| 286 |
+
margin: auto !important;
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
/* ๆจ้กๅๅ */
|
| 290 |
+
.main-title {
|
| 291 |
+
text-align: center;
|
| 292 |
+
padding: 2rem 0;
|
| 293 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 294 |
+
border-radius: 15px;
|
| 295 |
+
margin-bottom: 2rem;
|
| 296 |
+
color: white;
|
| 297 |
+
}
|
| 298 |
+
|
| 299 |
+
.main-title h1 {
|
| 300 |
+
font-size: 2.5rem;
|
| 301 |
+
margin-bottom: 0.5rem;
|
| 302 |
+
font-weight: 700;
|
| 303 |
+
}
|
| 304 |
+
|
| 305 |
+
.main-title p {
|
| 306 |
+
font-size: 1.1rem;
|
| 307 |
+
opacity: 0.9;
|
| 308 |
+
}
|
| 309 |
+
|
| 310 |
+
/* ๅก็ๆจฃๅผ */
|
| 311 |
+
.upload-card, .result-card {
|
| 312 |
+
background: white;
|
| 313 |
+
border-radius: 12px;
|
| 314 |
+
padding: 1.5rem;
|
| 315 |
+
box-shadow: 0 4px 6px rgba(0,0,0,0.07);
|
| 316 |
+
margin-bottom: 1.5rem;
|
| 317 |
+
}
|
| 318 |
+
|
| 319 |
+
/* ๆ้ๆจฃๅผ */
|
| 320 |
+
.custom-button {
|
| 321 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
|
| 322 |
+
border: none !important;
|
| 323 |
+
color: white !important;
|
| 324 |
+
font-weight: 600 !important;
|
| 325 |
+
padding: 0.75rem 2rem !important;
|
| 326 |
+
font-size: 1.1rem !important;
|
| 327 |
+
border-radius: 8px !important;
|
| 328 |
+
transition: transform 0.2s !important;
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
.custom-button:hover {
|
| 332 |
+
transform: translateY(-2px) !important;
|
| 333 |
+
box-shadow: 0 6px 12px rgba(102, 126, 234, 0.4) !important;
|
| 334 |
+
}
|
| 335 |
+
|
| 336 |
+
/* ็ๆ
ๆจ็ฑค */
|
| 337 |
+
.status-badge {
|
| 338 |
+
display: inline-block;
|
| 339 |
+
padding: 0.5rem 1rem;
|
| 340 |
+
border-radius: 20px;
|
| 341 |
+
font-weight: 600;
|
| 342 |
+
margin-bottom: 0.5rem;
|
| 343 |
+
}
|
| 344 |
+
|
| 345 |
+
.status-success { background: #10b981; color: white; }
|
| 346 |
+
.status-processing { background: #3b82f6; color: white; }
|
| 347 |
+
.status-error { background: #ef4444; color: white; }
|
| 348 |
+
.status-warning { background: #f59e0b; color: white; }
|
| 349 |
+
|
| 350 |
+
/* ๆๅญๅๅ */
|
| 351 |
+
textarea {
|
| 352 |
+
border: 2px solid #e5e7eb !important;
|
| 353 |
+
border-radius: 8px !important;
|
| 354 |
+
font-size: 0.95rem !important;
|
| 355 |
+
line-height: 1.6 !important;
|
| 356 |
+
}
|
| 357 |
+
|
| 358 |
+
/* ๆชๆกไธๅณๅๅ */
|
| 359 |
+
.file-upload {
|
| 360 |
+
border: 2px dashed #d1d5db !important;
|
| 361 |
+
border-radius: 12px !important;
|
| 362 |
+
padding: 2rem !important;
|
| 363 |
+
text-align: center !important;
|
| 364 |
+
transition: all 0.3s !important;
|
| 365 |
+
}
|
| 366 |
+
|
| 367 |
+
.file-upload:hover {
|
| 368 |
+
border-color: #667eea !important;
|
| 369 |
+
background: #f9fafb !important;
|
| 370 |
+
}
|
| 371 |
+
|
| 372 |
+
/* ่ณ่จๅก็ */
|
| 373 |
+
.info-card {
|
| 374 |
+
background: #f0f9ff;
|
| 375 |
+
border-left: 4px solid #3b82f6;
|
| 376 |
+
padding: 1rem;
|
| 377 |
+
border-radius: 8px;
|
| 378 |
+
margin: 1rem 0;
|
| 379 |
+
}
|
| 380 |
+
|
| 381 |
+
/* Tab ๆจฃๅผ */
|
| 382 |
+
.tab-nav button {
|
| 383 |
+
font-size: 1.05rem !important;
|
| 384 |
+
font-weight: 600 !important;
|
| 385 |
+
padding: 0.75rem 1.5rem !important;
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
.tab-nav button.selected {
|
| 389 |
+
border-bottom: 3px solid #667eea !important;
|
| 390 |
+
}
|
| 391 |
+
|
| 392 |
+
/* ็จๅผ็ขผๅๅก */
|
| 393 |
+
pre {
|
| 394 |
+
background: #1f2937 !important;
|
| 395 |
+
color: #f3f4f6 !important;
|
| 396 |
+
padding: 1rem !important;
|
| 397 |
+
border-radius: 8px !important;
|
| 398 |
+
overflow-x: auto !important;
|
| 399 |
+
font-size: 0.9rem !important;
|
| 400 |
+
}
|
| 401 |
+
|
| 402 |
+
code {
|
| 403 |
+
background: #1f2937 !important;
|
| 404 |
+
color: #f3f4f6 !important;
|
| 405 |
+
padding: 0.2rem 0.4rem !important;
|
| 406 |
+
border-radius: 4px !important;
|
| 407 |
+
font-family: 'Monaco', 'Menlo', monospace !important;
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
/* ๅ่ฝๅ่กจ */
|
| 411 |
+
.feature-list {
|
| 412 |
+
display: grid;
|
| 413 |
+
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
|
| 414 |
+
gap: 1rem;
|
| 415 |
+
margin: 1.5rem 0;
|
| 416 |
+
}
|
| 417 |
+
|
| 418 |
+
.feature-item {
|
| 419 |
+
background: white;
|
| 420 |
+
padding: 1.25rem;
|
| 421 |
+
border-radius: 10px;
|
| 422 |
+
border: 1px solid #e5e7eb;
|
| 423 |
+
transition: all 0.3s;
|
| 424 |
+
}
|
| 425 |
+
|
| 426 |
+
.feature-item:hover {
|
| 427 |
+
transform: translateY(-4px);
|
| 428 |
+
box-shadow: 0 8px 16px rgba(0,0,0,0.1);
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
.feature-icon {
|
| 432 |
+
font-size: 2rem;
|
| 433 |
+
margin-bottom: 0.5rem;
|
| 434 |
+
}
|
| 435 |
+
|
| 436 |
+
/* ้ฟๆๅผ่จญ่จ */
|
| 437 |
+
@media (max-width: 768px) {
|
| 438 |
+
.main-title h1 { font-size: 1.8rem; }
|
| 439 |
+
.main-title p { font-size: 1rem; }
|
| 440 |
+
}
|
| 441 |
+
"""
|
| 442 |
+
|
| 443 |
+
# ====== ๅปบ็ซ Gradio ไป้ข ======
|
| 444 |
+
with gr.Blocks(css=custom_css, theme=gr.themes.Soft(), title="Audio Transcription Service") as demo:
|
| 445 |
|
| 446 |
+
# ไธปๆจ้ก
|
| 447 |
+
gr.HTML("""
|
| 448 |
+
<div class="main-title">
|
| 449 |
+
<h1>๐ง Audio Transcription Service</h1>
|
| 450 |
+
<p>AI-Powered Speech-to-Text with Smart Summarization</p>
|
| 451 |
+
</div>
|
| 452 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 453 |
|
| 454 |
+
with gr.Tabs() as tabs:
|
| 455 |
+
# ====== Tab 1: Web Upload ======
|
| 456 |
+
with gr.Tab("๐ Web Interface", id="upload"):
|
| 457 |
+
gr.Markdown("### Upload and transcribe audio files directly from your browser")
|
| 458 |
+
|
| 459 |
+
with gr.Row():
|
| 460 |
+
# ๅทฆๅด๏ผไธๅณๅๅ
|
| 461 |
+
with gr.Column(scale=1):
|
| 462 |
+
gr.HTML('<div class="upload-card">')
|
| 463 |
+
gr.Markdown("#### ๐ Authentication")
|
| 464 |
+
pw_ui = gr.Textbox(
|
| 465 |
+
label="Password",
|
| 466 |
+
type="password",
|
| 467 |
+
placeholder="Enter your password...",
|
| 468 |
+
show_label=False
|
| 469 |
+
)
|
| 470 |
+
|
| 471 |
+
gr.Markdown("#### ๐ Upload Audio File")
|
| 472 |
+
file_ui = gr.File(
|
| 473 |
+
label="",
|
| 474 |
+
file_types=["audio", ".mp4"],
|
| 475 |
+
file_count="single",
|
| 476 |
+
show_label=False
|
| 477 |
+
)
|
| 478 |
+
|
| 479 |
+
gr.Markdown("""
|
| 480 |
+
<div class="info-card">
|
| 481 |
+
<strong>๐ก Supported Formats:</strong><br>
|
| 482 |
+
MP3, M4A, WAV, OGG, WEBM, MP4
|
| 483 |
+
</div>
|
| 484 |
+
""")
|
| 485 |
+
|
| 486 |
+
btn_ui = gr.Button(
|
| 487 |
+
"๐ Start Transcription",
|
| 488 |
+
variant="primary",
|
| 489 |
+
size="lg",
|
| 490 |
+
elem_classes="custom-button"
|
| 491 |
+
)
|
| 492 |
+
gr.HTML('</div>')
|
| 493 |
+
|
| 494 |
+
# ๅณๅด๏ผ็ตๆๅๅ
|
| 495 |
+
with gr.Column(scale=2):
|
| 496 |
+
gr.HTML('<div class="result-card">')
|
| 497 |
+
gr.Markdown("#### ๐ Processing Status")
|
| 498 |
+
status_label = gr.Textbox(
|
| 499 |
+
label="",
|
| 500 |
+
value="โธ๏ธ Ready",
|
| 501 |
+
interactive=False,
|
| 502 |
+
show_label=False,
|
| 503 |
+
max_lines=1
|
| 504 |
+
)
|
| 505 |
+
status_detail = gr.Textbox(
|
| 506 |
+
label="",
|
| 507 |
+
value="Upload an audio file and click 'Start Transcription' to begin",
|
| 508 |
+
interactive=False,
|
| 509 |
+
show_label=False,
|
| 510 |
+
lines=2
|
| 511 |
+
)
|
| 512 |
+
|
| 513 |
+
gr.Markdown("#### ๐ Transcription Result")
|
| 514 |
+
transcript_ui = gr.Textbox(
|
| 515 |
+
label="",
|
| 516 |
+
lines=12,
|
| 517 |
+
placeholder="Transcription will appear here...",
|
| 518 |
+
show_label=False,
|
| 519 |
+
show_copy_button=True
|
| 520 |
+
)
|
| 521 |
+
|
| 522 |
+
gr.Markdown("#### ๐ก AI Summary")
|
| 523 |
+
summary_ui = gr.Textbox(
|
| 524 |
+
label="",
|
| 525 |
+
lines=6,
|
| 526 |
+
placeholder="AI-generated summary will appear here...",
|
| 527 |
+
show_label=False,
|
| 528 |
+
show_copy_button=True
|
| 529 |
+
)
|
| 530 |
+
gr.HTML('</div>')
|
| 531 |
+
|
| 532 |
+
# ็ถๅฎไบไปถ
|
| 533 |
+
btn_ui.click(
|
| 534 |
+
transcribe_ui,
|
| 535 |
+
inputs=[pw_ui, file_ui],
|
| 536 |
+
outputs=[status_label, status_detail, transcript_ui, summary_ui]
|
| 537 |
+
)
|
| 538 |
|
| 539 |
+
# ====== Tab 2: API Documentation ======
|
| 540 |
+
with gr.Tab("๐ฑ API Documentation", id="api"):
|
| 541 |
+
gr.Markdown("""
|
| 542 |
+
## ๐ API Overview
|
| 543 |
+
|
| 544 |
+
This service provides a **synchronous REST API** for audio transcription, perfect for integration with iPhone Shortcuts, mobile apps, or any HTTP client.
|
| 545 |
+
""")
|
| 546 |
+
|
| 547 |
+
gr.HTML("""
|
| 548 |
+
<div class="feature-list">
|
| 549 |
+
<div class="feature-item">
|
| 550 |
+
<div class="feature-icon">โก</div>
|
| 551 |
+
<h3>Fully Synchronous</h3>
|
| 552 |
+
<p>Returns complete results in a single request - no polling required</p>
|
| 553 |
+
</div>
|
| 554 |
+
<div class="feature-item">
|
| 555 |
+
<div class="feature-icon">๐</div>
|
| 556 |
+
<h3>Auto-Processing</h3>
|
| 557 |
+
<p>Handles files of any length automatically with intelligent chunking</p>
|
| 558 |
+
</div>
|
| 559 |
+
<div class="feature-item">
|
| 560 |
+
<div class="feature-icon">๐ก๏ธ</div>
|
| 561 |
+
<h3>Reliable & Stable</h3>
|
| 562 |
+
<p>Waits for complete processing before returning results</p>
|
| 563 |
+
</div>
|
| 564 |
+
<div class="feature-item">
|
| 565 |
+
<div class="feature-icon">๐</div>
|
| 566 |
+
<h3>Universal Access</h3>
|
| 567 |
+
<p>Works with any HTTP client or programming language</p>
|
| 568 |
+
</div>
|
| 569 |
+
</div>
|
| 570 |
+
""")
|
| 571 |
+
|
| 572 |
+
gr.Markdown("""
|
| 573 |
+
---
|
| 574 |
+
|
| 575 |
+
## ๐ก API Endpoint
|
| 576 |
+
|
| 577 |
+
**URL:** `/api/transcribe`
|
| 578 |
+
**Method:** `POST`
|
| 579 |
+
**Content-Type:** `application/json`
|
| 580 |
+
|
| 581 |
+
### Request Format
|
| 582 |
+
|
| 583 |
+
```json
|
| 584 |
+
{
|
| 585 |
+
"password": "your_password_here",
|
| 586 |
+
"file_data": "data:audio/m4a;base64,UklGRiQAAABXQVZFZm10...",
|
| 587 |
+
"file_name": "recording.m4a"
|
| 588 |
+
}
|
| 589 |
+
```
|
| 590 |
+
|
| 591 |
+
**Parameters:**
|
| 592 |
+
- `password` (string, required): Authentication password
|
| 593 |
+
- `file_data` (string, required): Base64-encoded audio file in data URL format
|
| 594 |
+
- `file_name` (string, optional): Original filename (default: "recording.m4a")
|
| 595 |
+
|
| 596 |
+
### Response Format
|
| 597 |
+
|
| 598 |
+
**Success Response (200 OK):**
|
| 599 |
+
```json
|
| 600 |
+
{
|
| 601 |
+
"status": "success",
|
| 602 |
+
"transcription": "ๅฎๆด็่ช้ณ่ฝๆๅญๅ
งๅฎน...",
|
| 603 |
+
"summary": "AI ็ๆ็ๅ
งๅฎนๆ่ฆ..."
|
| 604 |
+
}
|
| 605 |
+
```
|
| 606 |
+
|
| 607 |
+
**Error Response (401/400/500):**
|
| 608 |
+
```json
|
| 609 |
+
{
|
| 610 |
+
"status": "error",
|
| 611 |
+
"error": "Error message description"
|
| 612 |
+
}
|
| 613 |
+
```
|
| 614 |
+
|
| 615 |
+
---
|
| 616 |
+
|
| 617 |
+
## ๐ฑ iPhone Shortcuts Setup Guide
|
| 618 |
+
|
| 619 |
+
### Step-by-Step Configuration:
|
| 620 |
+
|
| 621 |
+
1. **Get File** โ Select your audio recording
|
| 622 |
+
|
| 623 |
+
2. **Base64 Encode** โ Encode the file content
|
| 624 |
+
|
| 625 |
+
3. **Text** โ Create data URL format:
|
| 626 |
+
```
|
| 627 |
+
data:audio/m4a;base64,[Base64 Encode Result]
|
| 628 |
+
```
|
| 629 |
+
|
| 630 |
+
4. **Dictionary** โ Build request body:
|
| 631 |
+
- Key: `password`, Value: `chou`
|
| 632 |
+
- Key: `file_data`, Value: [Text from step 3]
|
| 633 |
+
- Key: `file_name`, Value: `recording.m4a`
|
| 634 |
+
|
| 635 |
+
5. **Get Contents of URL**:
|
| 636 |
+
- URL: `https://your-domain.com/api/transcribe`
|
| 637 |
+
- Method: `POST`
|
| 638 |
+
- Headers:
|
| 639 |
+
- `Content-Type`: `application/json`
|
| 640 |
+
- Request Body: [Dictionary from step 4]
|
| 641 |
+
- Request Body Type: `JSON`
|
| 642 |
+
|
| 643 |
+
6. **Get Dictionary Value**:
|
| 644 |
+
- Key: `transcription` โ Get transcription result
|
| 645 |
+
- Key: `summary` โ Get AI summary
|
| 646 |
+
|
| 647 |
+
7. **Show Result** or **Copy to Clipboard**
|
| 648 |
+
|
| 649 |
+
---
|
| 650 |
+
|
| 651 |
+
## ๐งช Testing the API
|
| 652 |
+
|
| 653 |
+
### Using cURL:
|
| 654 |
+
|
| 655 |
+
```bash
|
| 656 |
+
curl -X POST https://your-domain.com/api/transcribe \\
|
| 657 |
+
-H "Content-Type: application/json" \\
|
| 658 |
+
-d '{
|
| 659 |
+
"password": "chou",
|
| 660 |
+
"file_data": "data:audio/m4a;base64,AAAA...",
|
| 661 |
+
"file_name": "test.m4a"
|
| 662 |
+
}'
|
| 663 |
+
```
|
| 664 |
+
|
| 665 |
+
### Using Python:
|
| 666 |
+
|
| 667 |
+
```python
|
| 668 |
+
import requests
|
| 669 |
+
import base64
|
| 670 |
+
|
| 671 |
+
# Read and encode audio file
|
| 672 |
+
with open("audio.m4a", "rb") as f:
|
| 673 |
+
audio_b64 = base64.b64encode(f.read()).decode()
|
| 674 |
+
|
| 675 |
+
# Prepare request
|
| 676 |
+
url = "https://your-domain.com/api/transcribe"
|
| 677 |
+
payload = {
|
| 678 |
+
"password": "chou",
|
| 679 |
+
"file_data": f"data:audio/m4a;base64,{audio_b64}",
|
| 680 |
+
"file_name": "audio.m4a"
|
| 681 |
+
}
|
| 682 |
+
|
| 683 |
+
# Send request
|
| 684 |
+
response = requests.post(url, json=payload)
|
| 685 |
+
result = response.json()
|
| 686 |
+
|
| 687 |
+
if result["status"] == "success":
|
| 688 |
+
print("Transcription:", result["transcription"])
|
| 689 |
+
print("Summary:", result["summary"])
|
| 690 |
+
else:
|
| 691 |
+
print("Error:", result["error"])
|
| 692 |
+
```
|
| 693 |
+
|
| 694 |
+
### Using JavaScript (Node.js):
|
| 695 |
+
|
| 696 |
+
```javascript
|
| 697 |
+
const fs = require('fs');
|
| 698 |
+
const axios = require('axios');
|
| 699 |
+
|
| 700 |
+
// Read and encode audio file
|
| 701 |
+
const audioBuffer = fs.readFileSync('audio.m4a');
|
| 702 |
+
const audioB64 = audioBuffer.toString('base64');
|
| 703 |
+
|
| 704 |
+
// Send request
|
| 705 |
+
axios.post('https://your-domain.com/api/transcribe', {
|
| 706 |
+
password: 'chou',
|
| 707 |
+
file_data: `data:audio/m4a;base64,${audioB64}`,
|
| 708 |
+
file_name: 'audio.m4a'
|
| 709 |
+
})
|
| 710 |
+
.then(response => {
|
| 711 |
+
const { transcription, summary } = response.data;
|
| 712 |
+
console.log('Transcription:', transcription);
|
| 713 |
+
console.log('Summary:', summary);
|
| 714 |
+
})
|
| 715 |
+
.catch(error => {
|
| 716 |
+
console.error('Error:', error.response.data);
|
| 717 |
+
});
|
| 718 |
+
```
|
| 719 |
+
|
| 720 |
+
---
|
| 721 |
+
|
| 722 |
+
## โ๏ธ Technical Specifications
|
| 723 |
+
|
| 724 |
+
| Feature | Details |
|
| 725 |
+
|---------|---------|
|
| 726 |
+
| **Max File Size** | 25 MB per chunk (automatically splits larger files) |
|
| 727 |
+
| **Supported Formats** | MP3, M4A, MP4, WAV, OGG, WEBM, AAC, OPUS |
|
| 728 |
+
| **Processing Model** | OpenAI Whisper (high accuracy) |
|
| 729 |
+
| **Summary Model** | GPT-4o-mini (intelligent summarization) |
|
| 730 |
+
| **Language Support** | Traditional Chinese (Taiwan) output |
|
| 731 |
+
| **Response Time** | Varies by file length (typically 5-30 seconds) |
|
| 732 |
+
| **Authentication** | Password-based security |
|
| 733 |
+
|
| 734 |
+
---
|
| 735 |
+
|
| 736 |
+
## ๐ก Important Notes
|
| 737 |
+
|
| 738 |
+
- โ
**Fully synchronous:** The API waits for complete processing before responding
|
| 739 |
+
- โ
**No polling needed:** Single request returns final results
|
| 740 |
+
- โ
**Auto-chunking:** Large files are automatically split and processed
|
| 741 |
+
- โ
**Reliable:** Connection remains open until processing completes
|
| 742 |
+
- โ ๏ธ **Timeout considerations:** Ensure your HTTP client has sufficient timeout settings (recommended: 300 seconds)
|
| 743 |
+
- ๐ **Security:** Always use HTTPS in production environments
|
| 744 |
+
|
| 745 |
+
---
|
| 746 |
+
|
| 747 |
+
## ๐ Troubleshooting
|
| 748 |
+
|
| 749 |
+
**Problem:** 401 Unauthorized
|
| 750 |
+
**Solution:** Check that your password is correct
|
| 751 |
+
|
| 752 |
+
**Problem:** 400 Bad Request
|
| 753 |
+
**Solution:** Verify that `file_data` is in correct data URL format
|
| 754 |
+
|
| 755 |
+
**Problem:** 500 Internal Server Error
|
| 756 |
+
**Solution:** Check server logs for details; ensure audio file is valid
|
| 757 |
+
|
| 758 |
+
**Problem:** Request timeout
|
| 759 |
+
**Solution:** Increase HTTP client timeout setting or split audio into smaller files
|
| 760 |
+
|
| 761 |
+
**Problem:** Base64 encoding issues
|
| 762 |
+
**Solution:** Ensure proper encoding and data URL format: `data:audio/m4a;base64,[encoded_data]`
|
| 763 |
+
|
| 764 |
+
---
|
| 765 |
+
|
| 766 |
+
## ๐ Example Response Times
|
| 767 |
+
|
| 768 |
+
| File Duration | Approximate Processing Time |
|
| 769 |
+
|---------------|---------------------------|
|
| 770 |
+
| 0-30 seconds | 5-10 seconds |
|
| 771 |
+
| 30-60 seconds | 10-15 seconds |
|
| 772 |
+
| 1-3 minutes | 15-30 seconds |
|
| 773 |
+
| 3-5 minutes | 30-60 seconds |
|
| 774 |
+
| 5-10 minutes | 1-2 minutes |
|
| 775 |
+
|
| 776 |
+
*Note: Processing time includes transcription, language conversion, and AI summarization.*
|
| 777 |
+
|
| 778 |
+
---
|
| 779 |
+
|
| 780 |
+
## ๐ Integration Examples
|
| 781 |
+
|
| 782 |
+
### Zapier Integration
|
| 783 |
+
1. Trigger: New file in storage
|
| 784 |
+
2. Action: Webhooks by Zapier (POST request)
|
| 785 |
+
3. Configure endpoint with password and base64 encoded file
|
| 786 |
+
|
| 787 |
+
### Make.com Integration
|
| 788 |
+
1. Add HTTP module
|
| 789 |
+
2. Configure POST request with JSON payload
|
| 790 |
+
3. Parse response and route to desired action
|
| 791 |
+
|
| 792 |
+
### iOS Shortcuts Tips
|
| 793 |
+
- Use "Get Contents of URL" action
|
| 794 |
+
- Set request timeout to at least 120 seconds
|
| 795 |
+
- Add error handling for network issues
|
| 796 |
+
- Consider showing progress notification
|
| 797 |
+
|
| 798 |
+
---
|
| 799 |
+
|
| 800 |
+
## ๐ Support & Resources
|
| 801 |
+
|
| 802 |
+
For additional help or feature requests, please contact your service administrator.
|
| 803 |
+
|
| 804 |
+
**Useful Links:**
|
| 805 |
+
- OpenAI Whisper Documentation
|
| 806 |
+
- Base64 Encoding Tools
|
| 807 |
+
- iPhone Shortcuts Gallery
|