Spaces:

TheBug95
/

OphtalmoCapture

Sleeping

OphtalmoCapture / interface /components /recorder.py

Solucion de problemas con los botones de volver a grabar y restaurar original. Solucion de incongruencias en los dialogos de descargas e implementacion de internacionalizacion de la herramienta

1f7c87f 9 days ago

raw

history blame contribute delete

7.82 kB

	"""OphthalmoCapture — Audio Recorder & Transcription Component

	Records audio via st.audio_input, transcribes with Whisper, stores the
	audio bytes and transcription in the ephemeral session, and lets the
	doctor edit the transcription or restore the original.

	Includes timestamped segments from Whisper for reference.
	"""

	import hashlib
	import streamlit as st
	import database as db
	from i18n import t
	from services import session_manager as sm
	from services.whisper_service import transcribe_audio_with_timestamps, format_timestamp


	def _audio_fingerprint(audio_bytes: bytes) -> str:
	"""Return a short hash of the audio content for change detection."""
	return hashlib.md5(audio_bytes).hexdigest()


	def render_recorder(image_id: str, model, language: str):
	"""Render the audio recording + transcription panel.

	Parameters
	----------
	image_id : str
	UUID of the currently selected image.
	model :
	Loaded Whisper model instance.
	language : str
	ISO language code for transcription (e.g. "es").
	"""
	img = st.session_state.images.get(image_id)
	if img is None:
	return

	st.subheader(t("dictation"))

	# Track which audio blob we already processed so we don't re-transcribe
	processed_key = f"_last_audio_{image_id}"
	segments_key = f"_segments_{image_id}"

	# ── Handle button actions BEFORE widget rendering ────────────────────
	# Streamlit forbids setting a widget's session_state key after the widget
	# is instantiated. We use callback flags to detect button presses from
	# the previous rerun and apply state changes before the text_area.
	_rerecord_flag = f"_flag_rerecord_{image_id}"
	_restore_flag = f"_flag_restore_{image_id}"

	if st.session_state.pop(_rerecord_flag, False):
	img["audio_bytes"] = None
	img["transcription"] = ""
	img["transcription_original"] = ""
	st.session_state.pop(segments_key, None)
	st.session_state.pop(processed_key, None)
	st.session_state.pop(f"audio_input_{image_id}", None)
	# Set value BEFORE the text_area is created
	st.session_state[f"transcription_area_{image_id}"] = ""
	sm.update_activity()

	if st.session_state.pop(_restore_flag, False):
	img["transcription"] = img["transcription_original"]
	# Set value BEFORE the text_area is created
	st.session_state[f"transcription_area_{image_id}"] = img["transcription_original"]
	sm.update_activity()

	# ── Audio recording ──────────────────────────────────────────────────
	audio_wav = st.audio_input(
	t("record_audio"),
	key=f"audio_input_{image_id}",
	)

	if audio_wav is not None:
	audio_bytes = audio_wav.getvalue()
	fingerprint = _audio_fingerprint(audio_bytes)

	# Only transcribe if this is a new recording (content changed)
	if st.session_state.get(processed_key) != fingerprint:
	with st.spinner(t("transcribing")):
	text, segments = transcribe_audio_with_timestamps(
	model, audio_bytes, language
	)

	# Store in session
	img["audio_bytes"] = audio_bytes

	# Append (don't overwrite) if there was previous text
	if img["transcription"]:
	img["transcription"] += " " + text
	else:
	img["transcription"] = text

	# Keep a copy of the raw Whisper output
	if img["transcription_original"]:
	img["transcription_original"] += " " + text
	else:
	img["transcription_original"] = text

	# Store timestamped segments
	existing_segments = st.session_state.get(segments_key, [])
	st.session_state[segments_key] = existing_segments + segments

	# Mark this audio as processed using content hash (stable across reruns)
	st.session_state[processed_key] = fingerprint
	# Update the text_area widget state so it reflects the new text
	st.session_state[f"transcription_area_{image_id}"] = img["transcription"]

	# Re-save to audit DB if the image is already labeled (upsert)
	if img.get("label"):
	try:
	db.save_or_update_annotation(
	image_filename=img["filename"],
	label=img["label"],
	transcription=img["transcription"],
	doctor_name=st.session_state.get("doctor_name", ""),
	session_id=st.session_state.get("session_id", ""),
	locs_data=img.get("locs_data", {}),
	)
	except Exception:
	pass

	sm.update_activity()
	st.rerun()

	# ── Editable transcription ───────────────────────────────────────────
	edited_text = st.text_area(
	t("transcription_editable"),
	value=img["transcription"],
	height=180,
	key=f"transcription_area_{image_id}",
	placeholder=t("transcription_placeholder"),
	)

	# Sync edits back to session
	if edited_text != img["transcription"]:
	img["transcription"] = edited_text
	sm.update_activity()

	# ── Timestamped segments (Idea C) ────────────────────────────────────
	segments = st.session_state.get(segments_key, [])
	if segments:
	with st.expander(t("segments_timestamps"), expanded=False):
	for seg in segments:
	ts_start = format_timestamp(seg["start"])
	ts_end = format_timestamp(seg["end"])
	st.markdown(
	f"`{ts_start} → {ts_end}`   {seg['text']}"
	)

	# ── Helper buttons ───────────────────────────────────────────────────
	btn_cols = st.columns(2)

	with btn_cols[0]:
	# Re-record: set flag → rerun → flag handler above clears state
	has_audio = img["audio_bytes"] is not None
	if st.button(
	t("re_record"),
	key=f"rerecord_{image_id}",
	disabled=not has_audio,
	use_container_width=True,
	):
	st.session_state[_rerecord_flag] = True
	st.rerun()

	with btn_cols[1]:
	# Restore original: set flag → rerun → flag handler above restores
	has_original = bool(img["transcription_original"])
	is_different = img["transcription"] != img["transcription_original"]
	if st.button(
	t("restore_original"),
	key=f"restore_{image_id}",
	disabled=not (has_original and is_different),
	use_container_width=True,
	):
	st.session_state[_restore_flag] = True
	st.rerun()

	# ── Status line ──────────────────────────────────────────────────────
	if img["transcription"]:
	modified_tag = ""
	if (
	img["transcription_original"]
	and img["transcription"] != img["transcription_original"]
	):
	modified_tag = t("manually_modified")
	word_count = len(img["transcription"].split())
	st.caption(f"{t('word_count', count=word_count)}{modified_tag}")
	else:
	st.caption(t("no_transcription_yet"))