Spaces:
Runtime error
Runtime error
File size: 4,567 Bytes
6d5d850 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | import logging
import time
from pathlib import Path
from typing import Any, Optional
import gradio as gr
from . import config
from .model import LipReadingModel, predict_from_video
from .preprocessing import VideoPreprocessor
logger = logging.getLogger(__name__)
def _resolve_video_path(video_input: Any) -> Optional[Path]:
"""
Gradio's Video component can return a filepath or a dict depending on version.
Normalize to a Path if possible.
"""
if not video_input:
return None
if isinstance(video_input, (str, Path)):
path = Path(video_input)
return path if path.is_file() else None
if isinstance(video_input, dict):
for key in ("name", "path", "data"):
value = video_input.get(key)
if value:
candidate = Path(value)
if candidate.is_file():
return candidate
return None
def run_prediction(
video_input: Any,
model: LipReadingModel,
preprocessor: VideoPreprocessor,
):
"""
Takes a video input from Gradio, processes it, and returns the predicted text.
Includes validation and error handling.
"""
video_path = _resolve_video_path(video_input)
if not video_path:
return "No video provided. Please upload or record a video."
try:
video_size_mb = video_path.stat().st_size / (1024 * 1024)
logger.info("Uploaded video size: %.2f MB", video_size_mb)
except Exception as exc:
logger.error("Error accessing video file: %s", exc)
return f"Error accessing video file: {exc}"
if video_size_mb > config.MAX_VIDEO_SIZE_MB:
return f"Video size exceeds {config.MAX_VIDEO_SIZE_MB} MB limit. Please upload a smaller video."
try:
logger.info("Running prediction for %s", video_path)
start_time = time.time()
prediction = predict_from_video(
video_path=str(video_path),
model=model,
preprocessor=preprocessor,
)
total_time = time.time() - start_time
logger.info("Prediction completed in %.2f seconds.", total_time)
logger.info("Prediction result: %s", prediction)
except Exception as exc:
logger.error("Prediction error: %s", exc)
prediction = f"An error occurred during prediction: {exc}"
return prediction if isinstance(prediction, str) else str(prediction)
def create_app(model: LipReadingModel, preprocessor: VideoPreprocessor) -> gr.Blocks:
with gr.Blocks(title="Lip Reading App") as demo:
gr.Markdown("# Lip Reading App")
gr.Markdown(
"Upload a short video or record with your webcam to generate a lip-reading transcription."
)
with gr.Tab("Upload Video"):
video_input = gr.Video(
label="Upload your video",
sources=["upload"],
format="mp4",
)
predict_button = gr.Button("Run prediction")
prediction_output = gr.Textbox(
label="Predicted text",
interactive=False,
lines=4,
placeholder="Prediction will appear here.",
)
with gr.Tab("Record Video"):
video_recorder = gr.Video(
label="Record with webcam",
sources=["webcam"],
format="mp4",
)
predict_button_rec = gr.Button("Run prediction on recording")
prediction_output_rec = gr.Textbox(
label="Predicted text",
interactive=False,
lines=4,
placeholder="Prediction will appear here.",
)
with gr.Accordion("How to use", open=False):
gr.Markdown(
"""
**Upload video**
- Select an MP4/AVI/MOV/MPG video that clearly shows the speaker's lips.
- Click "Run prediction" to get the transcription.
**Record video**
- Allow webcam access if prompted.
- Record, wait for the preview to appear, then click "Run prediction on recording".
"""
)
predict_button.click(
fn=lambda video: run_prediction(video, model, preprocessor),
inputs=video_input,
outputs=prediction_output,
)
predict_button_rec.click(
fn=lambda video: run_prediction(video, model, preprocessor),
inputs=video_recorder,
outputs=prediction_output_rec,
)
gr.Markdown("---\n(c) 2024 Lip Reading App.")
demo.queue(max_size=4)
return demo
|