Spaces:

thienphuc12339
/

LipNet

Runtime error

File size: 4,567 Bytes

6d5d850

import logging
import time
from pathlib import Path
from typing import Any, Optional

import gradio as gr

from . import config
from .model import LipReadingModel, predict_from_video
from .preprocessing import VideoPreprocessor

logger = logging.getLogger(__name__)


def _resolve_video_path(video_input: Any) -> Optional[Path]:
    """
    Gradio's Video component can return a filepath or a dict depending on version.
    Normalize to a Path if possible.
    """
    if not video_input:
        return None

    if isinstance(video_input, (str, Path)):
        path = Path(video_input)
        return path if path.is_file() else None

    if isinstance(video_input, dict):
        for key in ("name", "path", "data"):
            value = video_input.get(key)
            if value:
                candidate = Path(value)
                if candidate.is_file():
                    return candidate
    return None


def run_prediction(
    video_input: Any,
    model: LipReadingModel,
    preprocessor: VideoPreprocessor,
):
    """
    Takes a video input from Gradio, processes it, and returns the predicted text.
    Includes validation and error handling.
    """
    video_path = _resolve_video_path(video_input)
    if not video_path:
        return "No video provided. Please upload or record a video."

    try:
        video_size_mb = video_path.stat().st_size / (1024 * 1024)
        logger.info("Uploaded video size: %.2f MB", video_size_mb)
    except Exception as exc:
        logger.error("Error accessing video file: %s", exc)
        return f"Error accessing video file: {exc}"

    if video_size_mb > config.MAX_VIDEO_SIZE_MB:
        return f"Video size exceeds {config.MAX_VIDEO_SIZE_MB} MB limit. Please upload a smaller video."

    try:
        logger.info("Running prediction for %s", video_path)
        start_time = time.time()
        prediction = predict_from_video(
            video_path=str(video_path),
            model=model,
            preprocessor=preprocessor,
        )
        total_time = time.time() - start_time
        logger.info("Prediction completed in %.2f seconds.", total_time)
        logger.info("Prediction result: %s", prediction)
    except Exception as exc:
        logger.error("Prediction error: %s", exc)
        prediction = f"An error occurred during prediction: {exc}"

    return prediction if isinstance(prediction, str) else str(prediction)


def create_app(model: LipReadingModel, preprocessor: VideoPreprocessor) -> gr.Blocks:
    with gr.Blocks(title="Lip Reading App") as demo:
        gr.Markdown("# Lip Reading App")
        gr.Markdown(
            "Upload a short video or record with your webcam to generate a lip-reading transcription."
        )

        with gr.Tab("Upload Video"):
            video_input = gr.Video(
                label="Upload your video",
                sources=["upload"],
                format="mp4",
            )
            predict_button = gr.Button("Run prediction")
            prediction_output = gr.Textbox(
                label="Predicted text",
                interactive=False,
                lines=4,
                placeholder="Prediction will appear here.",
            )

        with gr.Tab("Record Video"):
            video_recorder = gr.Video(
                label="Record with webcam",
                sources=["webcam"],
                format="mp4",
            )
            predict_button_rec = gr.Button("Run prediction on recording")
            prediction_output_rec = gr.Textbox(
                label="Predicted text",
                interactive=False,
                lines=4,
                placeholder="Prediction will appear here.",
            )

        with gr.Accordion("How to use", open=False):
            gr.Markdown(
                """
**Upload video**
- Select an MP4/AVI/MOV/MPG video that clearly shows the speaker's lips.
- Click "Run prediction" to get the transcription.

**Record video**
- Allow webcam access if prompted.
- Record, wait for the preview to appear, then click "Run prediction on recording".
"""
            )

        predict_button.click(
            fn=lambda video: run_prediction(video, model, preprocessor),
            inputs=video_input,
            outputs=prediction_output,
        )
        predict_button_rec.click(
            fn=lambda video: run_prediction(video, model, preprocessor),
            inputs=video_recorder,
            outputs=prediction_output_rec,
        )

        gr.Markdown("---\n(c) 2024 Lip Reading App.")

    demo.queue(max_size=4)
    return demo