Spaces:

thienphuc12339
/

LipNet

Runtime error

App Files Files Community

LipNet / lipnet /ui.py

thienphuc12339

Upload 10 files

6d5d850 verified 5 months ago

raw

history blame contribute delete

4.57 kB

	import logging
	import time
	from pathlib import Path
	from typing import Any, Optional

	import gradio as gr

	from . import config
	from .model import LipReadingModel, predict_from_video
	from .preprocessing import VideoPreprocessor

	logger = logging.getLogger(__name__)


	def _resolve_video_path(video_input: Any) -> Optional[Path]:
	"""
	Gradio's Video component can return a filepath or a dict depending on version.
	Normalize to a Path if possible.
	"""
	if not video_input:
	return None

	if isinstance(video_input, (str, Path)):
	path = Path(video_input)
	return path if path.is_file() else None

	if isinstance(video_input, dict):
	for key in ("name", "path", "data"):
	value = video_input.get(key)
	if value:
	candidate = Path(value)
	if candidate.is_file():
	return candidate
	return None


	def run_prediction(
	video_input: Any,
	model: LipReadingModel,
	preprocessor: VideoPreprocessor,
	):
	"""
	Takes a video input from Gradio, processes it, and returns the predicted text.
	Includes validation and error handling.
	"""
	video_path = _resolve_video_path(video_input)
	if not video_path:
	return "No video provided. Please upload or record a video."

	try:
	video_size_mb = video_path.stat().st_size / (1024 * 1024)
	logger.info("Uploaded video size: %.2f MB", video_size_mb)
	except Exception as exc:
	logger.error("Error accessing video file: %s", exc)
	return f"Error accessing video file: {exc}"

	if video_size_mb > config.MAX_VIDEO_SIZE_MB:
	return f"Video size exceeds {config.MAX_VIDEO_SIZE_MB} MB limit. Please upload a smaller video."

	try:
	logger.info("Running prediction for %s", video_path)
	start_time = time.time()
	prediction = predict_from_video(
	video_path=str(video_path),
	model=model,
	preprocessor=preprocessor,
	)
	total_time = time.time() - start_time
	logger.info("Prediction completed in %.2f seconds.", total_time)
	logger.info("Prediction result: %s", prediction)
	except Exception as exc:
	logger.error("Prediction error: %s", exc)
	prediction = f"An error occurred during prediction: {exc}"

	return prediction if isinstance(prediction, str) else str(prediction)


	def create_app(model: LipReadingModel, preprocessor: VideoPreprocessor) -> gr.Blocks:
	with gr.Blocks(title="Lip Reading App") as demo:
	gr.Markdown("# Lip Reading App")
	gr.Markdown(
	"Upload a short video or record with your webcam to generate a lip-reading transcription."
	)

	with gr.Tab("Upload Video"):
	video_input = gr.Video(
	label="Upload your video",
	sources=["upload"],
	format="mp4",
	)
	predict_button = gr.Button("Run prediction")
	prediction_output = gr.Textbox(
	label="Predicted text",
	interactive=False,
	lines=4,
	placeholder="Prediction will appear here.",
	)

	with gr.Tab("Record Video"):
	video_recorder = gr.Video(
	label="Record with webcam",
	sources=["webcam"],
	format="mp4",
	)
	predict_button_rec = gr.Button("Run prediction on recording")
	prediction_output_rec = gr.Textbox(
	label="Predicted text",
	interactive=False,
	lines=4,
	placeholder="Prediction will appear here.",
	)

	with gr.Accordion("How to use", open=False):
	gr.Markdown(
	"""
	Upload video
	- Select an MP4/AVI/MOV/MPG video that clearly shows the speaker's lips.
	- Click "Run prediction" to get the transcription.

	Record video
	- Allow webcam access if prompted.
	- Record, wait for the preview to appear, then click "Run prediction on recording".
	"""
	)

	predict_button.click(
	fn=lambda video: run_prediction(video, model, preprocessor),
	inputs=video_input,
	outputs=prediction_output,
	)
	predict_button_rec.click(
	fn=lambda video: run_prediction(video, model, preprocessor),
	inputs=video_recorder,
	outputs=prediction_output_rec,
	)

	gr.Markdown("---\n(c) 2024 Lip Reading App.")

	demo.queue(max_size=4)
	return demo