Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -32,19 +32,20 @@ from openai import OpenAI
|
|
| 32 |
import traceback
|
| 33 |
from TTS.api import TTS
|
| 34 |
import torch
|
| 35 |
-
from pydub import AudioSegment
|
| 36 |
from pyannote.audio import Pipeline
|
| 37 |
import wave
|
| 38 |
import librosa
|
| 39 |
import noisereduce as nr
|
| 40 |
-
import soundfile as sf
|
| 41 |
from paddleocr import PaddleOCR
|
| 42 |
import cv2
|
| 43 |
from rapidfuzz import fuzz
|
| 44 |
from tqdm import tqdm
|
| 45 |
import threading
|
| 46 |
import requests
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
logger = logging.getLogger(__name__)
|
| 50 |
|
|
@@ -64,6 +65,8 @@ client = OpenAI(
|
|
| 64 |
)
|
| 65 |
hf_api_key = os.environ.get("hf_token")
|
| 66 |
ELEVENLABS_API_KEY = os.environ.get("elevenlabs_token")
|
|
|
|
|
|
|
| 67 |
|
| 68 |
def silence(duration, fps=44100):
|
| 69 |
"""
|
|
@@ -276,139 +279,201 @@ def transcribe_video_with_speakers(video_path):
|
|
| 276 |
|
| 277 |
return transcript_with_speakers, detected_language
|
| 278 |
|
| 279 |
-
|
| 280 |
-
def transcribe_video_with_speakers_11labs(video_path, num_speakers=None):
|
| 281 |
"""
|
| 282 |
-
|
|
|
|
| 283 |
|
| 284 |
Args:
|
| 285 |
-
video_path (str): The path to the video
|
| 286 |
-
|
| 287 |
-
|
| 288 |
|
| 289 |
Returns:
|
| 290 |
tuple: A tuple containing:
|
| 291 |
-
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
- error_message (str, optional): An error message if
|
| 295 |
"""
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
# Correct API endpoint as per documentation
|
| 299 |
-
ELEVENLABS_SCRIBE_API_URL = "https://api.elevenlabs.io/v1/speech-to-text"
|
| 300 |
-
|
| 301 |
-
transcript_with_speakers = []
|
| 302 |
-
detected_language = None
|
| 303 |
error_message = None
|
| 304 |
-
audio_path = "temp_audio_for_scribe.wav"
|
| 305 |
|
| 306 |
try:
|
| 307 |
# 1. Extract audio from video
|
| 308 |
logger.info(f"Extracting audio from video: {video_path}")
|
| 309 |
video = VideoFileClip(video_path)
|
| 310 |
-
#
|
| 311 |
-
#
|
| 312 |
-
video.audio.write_audiofile(audio_path, codec='pcm_s16le')
|
| 313 |
-
video.close()
|
| 314 |
logger.info(f"Audio extracted to: {audio_path}")
|
| 315 |
|
| 316 |
-
# 2.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
headers = {
|
| 318 |
"xi-api-key": ELEVENLABS_API_KEY,
|
| 319 |
}
|
| 320 |
-
|
| 321 |
-
# Parameters sent as multipart form data
|
| 322 |
data = {
|
| 323 |
-
"model_id": "scribe_v1",
|
| 324 |
}
|
| 325 |
-
#
|
| 326 |
params = {
|
| 327 |
-
"diarize": "
|
| 328 |
}
|
| 329 |
-
if num_speakers is not None:
|
| 330 |
-
params["num_speakers"] = str(num_speakers) # Convert to string for API
|
| 331 |
|
| 332 |
-
|
| 333 |
-
"file": (os.path.basename(audio_path), open(audio_path, "rb"), "audio/wav") # Key changed to 'file'
|
| 334 |
-
}
|
| 335 |
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
scribe_result = response.json()
|
| 341 |
-
logger.info("Transcription response received from ElevenLabs Scribe.")
|
| 342 |
-
# logger.debug(f"ElevenLabs Scribe API Response: {json.dumps(scribe_result, indent=2)}")
|
| 343 |
-
|
| 344 |
-
# 3. Parse the API response to match the desired output format
|
| 345 |
-
# The API returns a 'words' list, we need to group them into segments
|
| 346 |
-
if "words" in scribe_result and scribe_result["words"]:
|
| 347 |
-
current_segment = None
|
| 348 |
-
for word_data in scribe_result["words"]:
|
| 349 |
-
# Only process actual words, skip spacing or other types if necessary
|
| 350 |
-
if word_data.get("type") != "word":
|
| 351 |
-
continue
|
| 352 |
-
|
| 353 |
-
word_text = word_data.get("text", "").strip()
|
| 354 |
-
word_start = float(word_data.get("start", 0))
|
| 355 |
-
word_end = float(word_data.get("end", 0))
|
| 356 |
-
speaker_id = word_data.get("speaker_id", "SPEAKER_UNKNOWN")
|
| 357 |
-
|
| 358 |
-
# If starting a new segment or speaker changed or significant gap
|
| 359 |
-
if (current_segment is None or
|
| 360 |
-
speaker_id != current_segment["speaker"] or
|
| 361 |
-
word_start - current_segment["end"] > 0.5): # Adjust gap threshold as needed
|
| 362 |
-
|
| 363 |
-
if current_segment is not None:
|
| 364 |
-
transcript_with_speakers.append(current_segment)
|
| 365 |
-
|
| 366 |
-
current_segment = {
|
| 367 |
-
"start": word_start,
|
| 368 |
-
"end": word_end,
|
| 369 |
-
"text": word_text,
|
| 370 |
-
"speaker": speaker_id
|
| 371 |
-
}
|
| 372 |
-
else:
|
| 373 |
-
# Continue current segment
|
| 374 |
-
current_segment["text"] += " " + word_text
|
| 375 |
-
current_segment["end"] = word_end
|
| 376 |
|
| 377 |
-
#
|
| 378 |
-
if
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
logger.info(f"Successfully parsed {len(transcript_with_speakers)} segments from words.")
|
| 382 |
-
else:
|
| 383 |
-
logger.warning("No 'words' found in ElevenLabs Scribe API response or response is empty.")
|
| 384 |
-
error_message = "ElevenLabs Scribe API response did not contain words for transcription."
|
| 385 |
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
|
| 390 |
-
except requests.exceptions.HTTPError as http_err:
|
| 391 |
-
error_message = f"HTTP error occurred: {http_err} - {response.text}"
|
| 392 |
-
logger.error(error_message)
|
| 393 |
-
except requests.exceptions.ConnectionError as conn_err:
|
| 394 |
-
error_message = f"Connection error occurred: {conn_err}"
|
| 395 |
-
logger.error(error_message)
|
| 396 |
-
except requests.exceptions.Timeout as timeout_err:
|
| 397 |
-
error_message = f"Timeout error occurred: {timeout_err}"
|
| 398 |
-
logger.error(error_message)
|
| 399 |
-
except requests.exceptions.RequestException as req_err:
|
| 400 |
-
error_message = f"An unexpected request error occurred: {req_err}"
|
| 401 |
-
logger.error(error_message)
|
| 402 |
except Exception as e:
|
| 403 |
-
error_message = f"An error occurred during transcription: {e}"
|
| 404 |
logger.error(error_message)
|
| 405 |
finally:
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
|
| 411 |
-
return transcript_with_speakers, detected_language
|
| 412 |
|
| 413 |
# Function to get the appropriate translation model based on target language
|
| 414 |
def get_translation_model(source_language, target_language):
|
|
@@ -1308,7 +1373,7 @@ def upload_and_manage(file, target_language, process_mode):
|
|
| 1308 |
|
| 1309 |
# Step 1: Transcribe audio from uploaded media file and get timestamps
|
| 1310 |
logger.info("Transcribing audio...")
|
| 1311 |
-
transcription_json, source_language =
|
| 1312 |
logger.info(f"Transcription completed. Detected source language: {source_language}")
|
| 1313 |
|
| 1314 |
transcription_json_merged = transcription_json
|
|
|
|
| 32 |
import traceback
|
| 33 |
from TTS.api import TTS
|
| 34 |
import torch
|
|
|
|
| 35 |
from pyannote.audio import Pipeline
|
| 36 |
import wave
|
| 37 |
import librosa
|
| 38 |
import noisereduce as nr
|
|
|
|
| 39 |
from paddleocr import PaddleOCR
|
| 40 |
import cv2
|
| 41 |
from rapidfuzz import fuzz
|
| 42 |
from tqdm import tqdm
|
| 43 |
import threading
|
| 44 |
import requests
|
| 45 |
+
import webrtcvad
|
| 46 |
+
from pydub import AudioSegment
|
| 47 |
+
from pydub.silence import split_on_silence
|
| 48 |
+
import soundfile as sf
|
| 49 |
|
| 50 |
logger = logging.getLogger(__name__)
|
| 51 |
|
|
|
|
| 65 |
)
|
| 66 |
hf_api_key = os.environ.get("hf_token")
|
| 67 |
ELEVENLABS_API_KEY = os.environ.get("elevenlabs_token")
|
| 68 |
+
# Correct API endpoint for ElevenLabs Scribe
|
| 69 |
+
ELEVENLABS_SCRIBE_API_URL = "https://api.elevenlabs.io/v1/speech-to-text"
|
| 70 |
|
| 71 |
def silence(duration, fps=44100):
|
| 72 |
"""
|
|
|
|
| 279 |
|
| 280 |
return transcript_with_speakers, detected_language
|
| 281 |
|
| 282 |
+
def segment_audio_from_video(video_path, frame_duration_ms=30):
|
|
|
|
| 283 |
"""
|
| 284 |
+
Extracts audio from a video and segments it into speech chunks using WebRTC VAD.
|
| 285 |
+
Returns a list of dictionaries, each with 'start' and 'end' timestamps for speech segments.
|
| 286 |
|
| 287 |
Args:
|
| 288 |
+
video_path (str): The path to the input video file.
|
| 289 |
+
frame_duration_ms (int): The duration of a frame in milliseconds for VAD (10, 20, or 30).
|
| 290 |
+
Lower values are more precise but computationally intensive.
|
| 291 |
|
| 292 |
Returns:
|
| 293 |
tuple: A tuple containing:
|
| 294 |
+
- audio_path (str): Path to the extracted temporary audio file.
|
| 295 |
+
- speech_segments (list): A list of dictionaries, where each dictionary
|
| 296 |
+
represents a speech segment with 'start' and 'end' timestamps in seconds.
|
| 297 |
+
- error_message (str, optional): An error message if processing fails.
|
| 298 |
"""
|
| 299 |
+
audio_path = "temp_extracted_audio.wav"
|
| 300 |
+
speech_segments = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
error_message = None
|
|
|
|
| 302 |
|
| 303 |
try:
|
| 304 |
# 1. Extract audio from video
|
| 305 |
logger.info(f"Extracting audio from video: {video_path}")
|
| 306 |
video = VideoFileClip(video_path)
|
| 307 |
+
# Ensure audio is saved in a compatible format for WebRTC VAD (16-bit, 1 channel, 8000/16000/32000 Hz)
|
| 308 |
+
# We will resample to 16kHz for VAD as it's a good balance.
|
| 309 |
+
video.audio.write_audiofile(audio_path, codec='pcm_s16le', fps=16000, nbytes=2, channels=1)
|
| 310 |
+
video.close()
|
| 311 |
logger.info(f"Audio extracted to: {audio_path}")
|
| 312 |
|
| 313 |
+
# 2. Load audio for VAD
|
| 314 |
+
audio = AudioSegment.from_wav(audio_path)
|
| 315 |
+
sample_rate = audio.frame_rate
|
| 316 |
+
audio_data = np.array(audio.get_array_of_samples())
|
| 317 |
+
|
| 318 |
+
# WebRTC VAD operates on 16-bit mono audio at 8kHz, 16kHz, or 32kHz.
|
| 319 |
+
# We already saved at 16kHz, so we can proceed.
|
| 320 |
+
if sample_rate not in [8000, 16000, 32000]:
|
| 321 |
+
error_message = f"Unsupported sample rate for VAD: {sample_rate} Hz. Must be 8kHz, 16kHz, or 32kHz."
|
| 322 |
+
logger.error(error_message)
|
| 323 |
+
return audio_path, [], error_message
|
| 324 |
+
|
| 325 |
+
vad = webrtcvad.Vad(3) # Aggressiveness mode (0-3, 3 is most aggressive)
|
| 326 |
+
frames = []
|
| 327 |
+
offset = 0
|
| 328 |
+
while offset + frame_duration_ms <= len(audio):
|
| 329 |
+
frame_start = offset
|
| 330 |
+
frame_end = offset + frame_duration_ms
|
| 331 |
+
frame = audio[frame_start:frame_end]
|
| 332 |
+
frames.append(frame)
|
| 333 |
+
offset += frame_duration_ms
|
| 334 |
+
|
| 335 |
+
logger.info(f"Running WebRTC VAD on {len(frames)} frames...")
|
| 336 |
+
|
| 337 |
+
current_segment_start = None
|
| 338 |
+
for i, frame in enumerate(frames):
|
| 339 |
+
is_speech = vad.is_speech(frame.raw_data, sample_rate)
|
| 340 |
+
|
| 341 |
+
frame_start_time = (i * frame_duration_ms) / 1000.0
|
| 342 |
+
frame_end_time = ((i + 1) * frame_duration_ms) / 1000.0
|
| 343 |
+
|
| 344 |
+
if is_speech:
|
| 345 |
+
if current_segment_start is None:
|
| 346 |
+
current_segment_start = frame_start_time
|
| 347 |
+
else:
|
| 348 |
+
if current_segment_start is not None:
|
| 349 |
+
speech_segments.append({"start": current_segment_start, "end": frame_end_time})
|
| 350 |
+
current_segment_start = None
|
| 351 |
+
|
| 352 |
+
# Add the last segment if it ended with speech
|
| 353 |
+
if current_segment_start is not None:
|
| 354 |
+
speech_segments.append({"start": current_segment_start, "end": len(audio) / 1000.0})
|
| 355 |
+
|
| 356 |
+
logger.info(f"VAD completed. Found {len(speech_segments)} speech segments.")
|
| 357 |
+
|
| 358 |
+
except Exception as e:
|
| 359 |
+
error_message = f"An error occurred during audio segmentation: {e}"
|
| 360 |
+
logger.error(error_message)
|
| 361 |
+
|
| 362 |
+
return audio_path, speech_segments, error_message
|
| 363 |
+
|
| 364 |
+
def transcribe_segments_with_scribe(full_audio_path, segments):
|
| 365 |
+
"""
|
| 366 |
+
Transcribes pre-defined audio segments using the ElevenLabs Scribe API.
|
| 367 |
+
Diarization is explicitly turned off as per requirements.
|
| 368 |
+
|
| 369 |
+
Args:
|
| 370 |
+
full_audio_path (str): The path to the full extracted audio file.
|
| 371 |
+
segments (list): A list of dictionaries, where each dictionary
|
| 372 |
+
represents a segment with 'start' and 'end' timestamps in seconds.
|
| 373 |
+
|
| 374 |
+
Returns:
|
| 375 |
+
tuple: A tuple containing:
|
| 376 |
+
- transcribed_segments (list): A list of dictionaries, where each dictionary
|
| 377 |
+
represents a transcribed segment with 'start', 'end', and 'text'.
|
| 378 |
+
- detected_language (str): The language detected by the API (e.g., "en", "es").
|
| 379 |
+
- error_message (str, optional): An error message if transcription fails.
|
| 380 |
+
"""
|
| 381 |
+
transcribed_segments = []
|
| 382 |
+
detected_language = "unknown" # Default
|
| 383 |
+
error_message = None
|
| 384 |
+
|
| 385 |
+
if not os.path.exists(full_audio_path):
|
| 386 |
+
return [], detected_language, f"Full audio file not found at {full_audio_path}"
|
| 387 |
+
|
| 388 |
+
try:
|
| 389 |
+
audio_clip = AudioFileClip(full_audio_path)
|
| 390 |
+
|
| 391 |
headers = {
|
| 392 |
"xi-api-key": ELEVENLABS_API_KEY,
|
| 393 |
}
|
|
|
|
|
|
|
| 394 |
data = {
|
| 395 |
+
"model_id": "scribe_v1",
|
| 396 |
}
|
| 397 |
+
# Explicitly set diarize to false, as it's not needed.
|
| 398 |
params = {
|
| 399 |
+
"diarize": "false",
|
| 400 |
}
|
|
|
|
|
|
|
| 401 |
|
| 402 |
+
logger.info(f"Starting transcription of {len(segments)} segments with ElevenLabs Scribe...")
|
|
|
|
|
|
|
| 403 |
|
| 404 |
+
for i, segment in enumerate(segments):
|
| 405 |
+
segment_start = segment["start"]
|
| 406 |
+
segment_end = segment["end"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
|
| 408 |
+
# Ensure segment duration is positive
|
| 409 |
+
if segment_end <= segment_start:
|
| 410 |
+
logger.warning(f"Skipping segment {i} due to invalid duration: {segment_start:.2f}s -> {segment_end:.2f}s")
|
| 411 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
| 412 |
|
| 413 |
+
temp_segment_audio_path = f"temp_segment_{i}.wav"
|
| 414 |
+
try:
|
| 415 |
+
# Subclip the audio and save it temporarily
|
| 416 |
+
sub_clip = audio_clip.subclip(segment_start, segment_end)
|
| 417 |
+
# Save as 16-bit PCM WAV for Scribe API compatibility
|
| 418 |
+
sub_clip.write_audiofile(temp_segment_audio_path, codec='pcm_s16le')
|
| 419 |
+
|
| 420 |
+
logger.info(f"Transcribing segment {i+1}/{len(segments)}: {segment_start:.2f}s - {segment_end:.2f}s")
|
| 421 |
+
|
| 422 |
+
with open(temp_segment_audio_path, "rb") as audio_file:
|
| 423 |
+
files = {
|
| 424 |
+
"file": (os.path.basename(temp_segment_audio_path), audio_file, "audio/wav")
|
| 425 |
+
}
|
| 426 |
+
response = requests.post(ELEVENLABS_SCRIBE_API_URL, headers=headers, files=files, data=data, params=params)
|
| 427 |
+
response.raise_for_status()
|
| 428 |
+
scribe_result = response.json()
|
| 429 |
+
|
| 430 |
+
segment_text = ""
|
| 431 |
+
if "text" in scribe_result:
|
| 432 |
+
segment_text = scribe_result["text"].strip()
|
| 433 |
+
elif "words" in scribe_result and scribe_result["words"]:
|
| 434 |
+
# Fallback if 'text' field is not directly available, reconstruct from words
|
| 435 |
+
segment_text = " ".join([w.get("text", "") for w in scribe_result["words"] if w.get("type") == "word"]).strip()
|
| 436 |
+
|
| 437 |
+
if segment_text:
|
| 438 |
+
transcribed_segments.append({
|
| 439 |
+
"start": segment_start,
|
| 440 |
+
"end": segment_end,
|
| 441 |
+
"text": segment_text
|
| 442 |
+
})
|
| 443 |
+
else:
|
| 444 |
+
logger.warning(f"No transcription text found for segment {i+1}.")
|
| 445 |
+
|
| 446 |
+
# Update detected language from the first successful transcription
|
| 447 |
+
if "language_code" in scribe_result and detected_language == "unknown":
|
| 448 |
+
detected_language = scribe_result["language_code"]
|
| 449 |
+
|
| 450 |
+
except requests.exceptions.HTTPError as http_err:
|
| 451 |
+
error_message = f"HTTP error for segment {i+1}: {http_err} - {response.text}"
|
| 452 |
+
logger.error(error_message)
|
| 453 |
+
# Continue to next segment even if one fails
|
| 454 |
+
except requests.exceptions.RequestException as req_err:
|
| 455 |
+
error_message = f"Request error for segment {i+1}: {req_err}"
|
| 456 |
+
logger.error(error_message)
|
| 457 |
+
# Continue to next segment
|
| 458 |
+
except Exception as e:
|
| 459 |
+
error_message = f"Error processing segment {i+1}: {e}"
|
| 460 |
+
logger.error(error_message)
|
| 461 |
+
# Continue to next segment
|
| 462 |
+
finally:
|
| 463 |
+
if os.path.exists(temp_segment_audio_path):
|
| 464 |
+
os.remove(temp_segment_audio_path)
|
| 465 |
+
|
| 466 |
+
logger.info("All segments processed by ElevenLabs Scribe.")
|
| 467 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 468 |
except Exception as e:
|
| 469 |
+
error_message = f"An error occurred during overall transcription process: {e}"
|
| 470 |
logger.error(error_message)
|
| 471 |
finally:
|
| 472 |
+
if 'audio_clip' in locals() and audio_clip is not None:
|
| 473 |
+
audio_clip.close()
|
| 474 |
+
|
| 475 |
+
return transcribed_segments, detected_language, error_message
|
| 476 |
|
|
|
|
| 477 |
|
| 478 |
# Function to get the appropriate translation model based on target language
|
| 479 |
def get_translation_model(source_language, target_language):
|
|
|
|
| 1373 |
|
| 1374 |
# Step 1: Transcribe audio from uploaded media file and get timestamps
|
| 1375 |
logger.info("Transcribing audio...")
|
| 1376 |
+
transcription_json, source_language = transcribe_segments_with_scribe(file.name)
|
| 1377 |
logger.info(f"Transcription completed. Detected source language: {source_language}")
|
| 1378 |
|
| 1379 |
transcription_json_merged = transcription_json
|