Spaces:
Sleeping
Sleeping
Commit ·
09a8733
1
Parent(s): f0fa4f3
updated fix for mp4 vidoes
Browse files
app.py
CHANGED
|
@@ -13,9 +13,14 @@ from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtra
|
|
| 13 |
TEMP_VIDEO = "temp_video.mp4"
|
| 14 |
RAW_AUDIO = "raw_audio_input"
|
| 15 |
CONVERTED_AUDIO = "converted_audio.wav"
|
| 16 |
-
MODEL_DIR = "model"
|
| 17 |
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
MODEL_REPO = "ylacombe/accent-classifier"
|
| 20 |
model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_REPO, cache_dir="hf_model_cache")
|
| 21 |
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_REPO)
|
|
@@ -28,12 +33,45 @@ LABELS = [model.config.id2label[i] for i in range(len(model.config.id2label))]
|
|
| 28 |
|
| 29 |
# === Download video from URL ===
|
| 30 |
def download_video(url, filename=TEMP_VIDEO):
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
# === Extract audio from video ===
|
| 39 |
def extract_audio_from_video(video_path, output_path=RAW_AUDIO + ".mp4"):
|
|
|
|
| 13 |
TEMP_VIDEO = "temp_video.mp4"
|
| 14 |
RAW_AUDIO = "raw_audio_input"
|
| 15 |
CONVERTED_AUDIO = "converted_audio.wav"
|
|
|
|
| 16 |
|
| 17 |
+
|
| 18 |
+
# === load local model
|
| 19 |
+
# MODEL_DIR = "model"
|
| 20 |
+
# model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_DIR, local_files_only=True)
|
| 21 |
+
# feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_DIR)
|
| 22 |
+
|
| 23 |
+
# # === Load model from huggingface and feature extractor ===
|
| 24 |
MODEL_REPO = "ylacombe/accent-classifier"
|
| 25 |
model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_REPO, cache_dir="hf_model_cache")
|
| 26 |
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_REPO)
|
|
|
|
| 33 |
|
| 34 |
# === Download video from URL ===
|
| 35 |
def download_video(url, filename=TEMP_VIDEO):
|
| 36 |
+
import mimetypes
|
| 37 |
+
|
| 38 |
+
temp_download = "raw_download.mp4"
|
| 39 |
+
headers = {
|
| 40 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
try:
|
| 44 |
+
r = requests.get(url, headers=headers, stream=True, timeout=15)
|
| 45 |
+
r.raise_for_status()
|
| 46 |
+
|
| 47 |
+
content_type = r.headers.get("Content-Type", "")
|
| 48 |
+
if not content_type.startswith("video/"):
|
| 49 |
+
raise RuntimeError(f"URL does not point to a video file. Content-Type: {content_type}")
|
| 50 |
+
|
| 51 |
+
with open(temp_download, 'wb') as f:
|
| 52 |
+
for chunk in r.iter_content(chunk_size=8192):
|
| 53 |
+
f.write(chunk)
|
| 54 |
+
|
| 55 |
+
except Exception as e:
|
| 56 |
+
raise RuntimeError(f"Failed to download video: {e}")
|
| 57 |
+
|
| 58 |
+
# Attempt to fix the file with ffmpeg
|
| 59 |
+
repaired_file = filename
|
| 60 |
+
ffmpeg_cmd = [
|
| 61 |
+
"ffmpeg", "-y", "-i", temp_download,
|
| 62 |
+
"-c", "copy", "-movflags", "+faststart", repaired_file
|
| 63 |
+
]
|
| 64 |
+
result = subprocess.run(ffmpeg_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
| 65 |
+
|
| 66 |
+
if result.returncode != 0 or not os.path.exists(repaired_file) or os.path.getsize(repaired_file) == 0:
|
| 67 |
+
print(result.stderr.decode())
|
| 68 |
+
raise RuntimeError("FFmpeg failed to process the video. File may not be a valid MP4.")
|
| 69 |
+
|
| 70 |
+
os.remove(temp_download)
|
| 71 |
+
return repaired_file
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
|
| 75 |
|
| 76 |
# === Extract audio from video ===
|
| 77 |
def extract_audio_from_video(video_path, output_path=RAW_AUDIO + ".mp4"):
|
test.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
|
| 3 |
+
url = "https://store3.gofile.io/download/web/7a1f0c47-93e5-45c1-90b3-e05cb8611501/sample-file.mp4"
|
| 4 |
+
r = requests.get(url, allow_redirects=True)
|
| 5 |
+
|
| 6 |
+
print("Content-Type:", r.headers.get("Content-Type"))
|
| 7 |
+
print("File size (bytes):", len(r.content))
|
| 8 |
+
print("First 200 bytes:\n", r.content[:200])
|