Spaces:
Runtime error
Runtime error
| import logging | |
| import time | |
| from pathlib import Path | |
| from typing import Any, Optional | |
| import gradio as gr | |
| from . import config | |
| from .model import LipReadingModel, predict_from_video | |
| from .preprocessing import VideoPreprocessor | |
| logger = logging.getLogger(__name__) | |
| def _resolve_video_path(video_input: Any) -> Optional[Path]: | |
| """ | |
| Gradio's Video component can return a filepath or a dict depending on version. | |
| Normalize to a Path if possible. | |
| """ | |
| if not video_input: | |
| return None | |
| if isinstance(video_input, (str, Path)): | |
| path = Path(video_input) | |
| return path if path.is_file() else None | |
| if isinstance(video_input, dict): | |
| for key in ("name", "path", "data"): | |
| value = video_input.get(key) | |
| if value: | |
| candidate = Path(value) | |
| if candidate.is_file(): | |
| return candidate | |
| return None | |
| def run_prediction( | |
| video_input: Any, | |
| model: LipReadingModel, | |
| preprocessor: VideoPreprocessor, | |
| ): | |
| """ | |
| Takes a video input from Gradio, processes it, and returns the predicted text. | |
| Includes validation and error handling. | |
| """ | |
| video_path = _resolve_video_path(video_input) | |
| if not video_path: | |
| return "No video provided. Please upload or record a video." | |
| try: | |
| video_size_mb = video_path.stat().st_size / (1024 * 1024) | |
| logger.info("Uploaded video size: %.2f MB", video_size_mb) | |
| except Exception as exc: | |
| logger.error("Error accessing video file: %s", exc) | |
| return f"Error accessing video file: {exc}" | |
| if video_size_mb > config.MAX_VIDEO_SIZE_MB: | |
| return f"Video size exceeds {config.MAX_VIDEO_SIZE_MB} MB limit. Please upload a smaller video." | |
| try: | |
| logger.info("Running prediction for %s", video_path) | |
| start_time = time.time() | |
| prediction = predict_from_video( | |
| video_path=str(video_path), | |
| model=model, | |
| preprocessor=preprocessor, | |
| ) | |
| total_time = time.time() - start_time | |
| logger.info("Prediction completed in %.2f seconds.", total_time) | |
| logger.info("Prediction result: %s", prediction) | |
| except Exception as exc: | |
| logger.error("Prediction error: %s", exc) | |
| prediction = f"An error occurred during prediction: {exc}" | |
| return prediction if isinstance(prediction, str) else str(prediction) | |
| def create_app(model: LipReadingModel, preprocessor: VideoPreprocessor) -> gr.Blocks: | |
| with gr.Blocks(title="Lip Reading App") as demo: | |
| gr.Markdown("# Lip Reading App") | |
| gr.Markdown( | |
| "Upload a short video or record with your webcam to generate a lip-reading transcription." | |
| ) | |
| with gr.Tab("Upload Video"): | |
| video_input = gr.Video( | |
| label="Upload your video", | |
| sources=["upload"], | |
| format="mp4", | |
| ) | |
| predict_button = gr.Button("Run prediction") | |
| prediction_output = gr.Textbox( | |
| label="Predicted text", | |
| interactive=False, | |
| lines=4, | |
| placeholder="Prediction will appear here.", | |
| ) | |
| with gr.Tab("Record Video"): | |
| video_recorder = gr.Video( | |
| label="Record with webcam", | |
| sources=["webcam"], | |
| format="mp4", | |
| ) | |
| predict_button_rec = gr.Button("Run prediction on recording") | |
| prediction_output_rec = gr.Textbox( | |
| label="Predicted text", | |
| interactive=False, | |
| lines=4, | |
| placeholder="Prediction will appear here.", | |
| ) | |
| with gr.Accordion("How to use", open=False): | |
| gr.Markdown( | |
| """ | |
| **Upload video** | |
| - Select an MP4/AVI/MOV/MPG video that clearly shows the speaker's lips. | |
| - Click "Run prediction" to get the transcription. | |
| **Record video** | |
| - Allow webcam access if prompted. | |
| - Record, wait for the preview to appear, then click "Run prediction on recording". | |
| """ | |
| ) | |
| predict_button.click( | |
| fn=lambda video: run_prediction(video, model, preprocessor), | |
| inputs=video_input, | |
| outputs=prediction_output, | |
| ) | |
| predict_button_rec.click( | |
| fn=lambda video: run_prediction(video, model, preprocessor), | |
| inputs=video_recorder, | |
| outputs=prediction_output_rec, | |
| ) | |
| gr.Markdown("---\n(c) 2024 Lip Reading App.") | |
| demo.queue(max_size=4) | |
| return demo | |