userIdc2024 commited on
Commit
638b572
·
verified ·
1 Parent(s): 2727837

Upload 2 files

Browse files
generator_function/video_hashid_function.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from typing import Optional
3
+ import hashlib, base64, binascii, math, subprocess, shutil, os, logging, uuid, tempfile
4
+ from dotenv import load_dotenv
5
+ from imageio_ffmpeg import get_ffmpeg_exe
6
+ import boto3
7
+
8
+ load_dotenv()
9
+ logger = logging.getLogger(__name__)
10
+
11
+ # ---------- HASH UTILS ----------
12
+ def _sha256(path: Path, chunk: int = 1 << 20) -> str:
13
+ h = hashlib.sha256()
14
+ with path.open("rb") as f:
15
+ for b in iter(lambda: f.read(chunk), b""):
16
+ h.update(b)
17
+ return h.hexdigest()
18
+
19
+ def _hashid_short(sha256_hex: str, length: int = 12) -> str:
20
+ b = binascii.unhexlify(sha256_hex)
21
+ return base64.urlsafe_b64encode(b).decode().rstrip("=")[:length]
22
+
23
+ # ---------- FFMPEG ----------
24
+ def _ffmpeg_bin() -> str:
25
+ try:
26
+ return get_ffmpeg_exe()
27
+ except Exception:
28
+ p = os.environ.get("FFMPEG_BIN") or shutil.which("ffmpeg")
29
+ if not p:
30
+ raise FileNotFoundError("ffmpeg not found. Install it or set FFMPEG_BIN")
31
+ return p
32
+
33
+ FFMPEG = _ffmpeg_bin()
34
+
35
+ def _run_ffmpeg(cmd: list[str]) -> None:
36
+ cmd = cmd[:]
37
+ cmd[0] = FFMPEG
38
+ proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
39
+ if proc.returncode != 0:
40
+ raise RuntimeError(proc.stderr.decode(errors="ignore") or "ffmpeg failed")
41
+
42
+ # ---------- THUMBNAIL ----------
43
+ def extract_thumbnail(video_path: str, time_position: str = "00:00:01") -> str:
44
+ """
45
+ Extracts a thumbnail (JPEG) from the video and returns it as a base64 string.
46
+ Default frame at 1 second.
47
+ """
48
+ tmp_thumb = Path(tempfile.gettempdir()) / f"{Path(video_path).stem}_thumb.jpg"
49
+ cmd = [
50
+ "ffmpeg", "-nostdin", "-y",
51
+ "-ss", time_position,
52
+ "-i", video_path,
53
+ "-frames:v", "1",
54
+ "-q:v", "2",
55
+ str(tmp_thumb),
56
+ ]
57
+ _run_ffmpeg(cmd)
58
+
59
+ with open(tmp_thumb, "rb") as f:
60
+ encoded = base64.b64encode(f.read()).decode("utf-8")
61
+ return encoded
62
+
63
+ # ---------- AUGMENT VIDEO ----------
64
+ def augment_video_random(
65
+ *,
66
+ input_path: str,
67
+ output_path: Optional[str] = None,
68
+ crf: int = 20,
69
+ preset: str = "medium",
70
+ ) -> str:
71
+ import random
72
+ k_b, k_c, k_h, k_s = [random.randint(-5, 5) for _ in range(4)]
73
+ brightness = max(-1.0, min(1.0, k_b * 0.05))
74
+ contrast = max(0.0, min(2.0, 1.0 + k_c * 0.05))
75
+ hue_rad = math.radians(k_h * 5.0)
76
+ sat_scale = max(0.0, 1.0 + k_s * 0.05)
77
+
78
+ vf = f"hue=h={hue_rad:.6f}:s={sat_scale:.4f},eq=contrast={contrast:.4f}:brightness={brightness:.4f}"
79
+
80
+ inp = Path(input_path)
81
+ out = Path(output_path) / f"{inp.stem}_augmented.mp4" if output_path else inp.with_name(f"{inp.stem}_augmented.mp4")
82
+
83
+ cmd = [
84
+ "ffmpeg", "-nostdin", "-y",
85
+ "-i", str(inp),
86
+ "-map", "0:v:0", "-map", "0:a?",
87
+ "-vf", vf,
88
+ "-c:v", "libx264", "-preset", preset, "-crf", str(crf),
89
+ "-pix_fmt", "yuv420p",
90
+ "-c:a", "copy",
91
+ "-movflags", "+faststart",
92
+ str(out),
93
+ ]
94
+ _run_ffmpeg(cmd)
95
+ return str(out)
96
+
97
+ # ---------- VIDEO UPLOAD TO R2 ----------
98
+ def upload_video_to_r2(video_bytes: bytes, file_name: str) -> str:
99
+ s3 = boto3.client(
100
+ "s3",
101
+ endpoint_url=os.getenv("R2_ENDPOINT"),
102
+ aws_access_key_id=os.getenv("R2_ACCESS_KEY"),
103
+ aws_secret_access_key=os.getenv("R2_SECRET_KEY"),
104
+ region_name="auto",
105
+ )
106
+
107
+ bucket = os.getenv("R2_BUCKET_NAME")
108
+ key = f"videos/{uuid.uuid4()}_{file_name}"
109
+
110
+ s3.put_object(Bucket=bucket, Key=key, Body=video_bytes, ContentType="video/mp4")
111
+
112
+ return f"{os.getenv('NEW_BASE').rstrip('/')}/{key}"
113
+
114
+ # ---------- PROCESS + UPLOAD ----------
115
+ def process_video_with_hash_info(input_path: str, output_path: Optional[str] = None) -> dict:
116
+ in_sha = _sha256(Path(input_path))
117
+ in_id = _hashid_short(in_sha)
118
+
119
+ # Augment and get output
120
+ out_path = augment_video_random(input_path=input_path, output_path=output_path)
121
+
122
+ # output hashid
123
+ out_sha = _sha256(Path(out_path))
124
+ out_id = _hashid_short(out_sha)
125
+
126
+ # copy file to safe tmp path
127
+ safe_tmp = Path(tempfile.gettempdir()) / Path(out_path).name
128
+ shutil.copy(out_path, safe_tmp)
129
+
130
+ # thumbnail
131
+ try:
132
+ thumbnail_b64 = extract_thumbnail(str(safe_tmp))
133
+ except Exception as e:
134
+ logger.error(f"Thumbnail extraction failed: {e}")
135
+ thumbnail_b64 = ""
136
+
137
+
138
+ try:
139
+ with open(safe_tmp, "rb") as f:
140
+ video_bytes = f.read()
141
+ r2_url = upload_video_to_r2(video_bytes, Path(safe_tmp).name)
142
+ except Exception as e:
143
+ logger.error(f"Upload to R2 failed: {e}")
144
+ r2_url = None
145
+
146
+ return {
147
+ "input_hashid": in_id,
148
+ "output_name": Path(safe_tmp).name,
149
+ "output_path": str(safe_tmp),
150
+ "output_hashid": out_id,
151
+ "output_r2_url": r2_url,
152
+ "thumbnail": thumbnail_b64,
153
+ }
generator_function/video_text_generator.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, uuid, subprocess, boto3, replicate
2
+ from typing import Dict, Any
3
+ from dotenv import load_dotenv
4
+ # from helpers_function.helpers import upload_to_r2
5
+
6
+ load_dotenv()
7
+
8
+ replicate_client = replicate.Client(api_token=os.getenv("REPLICATE_API_KEY"))
9
+
10
+ def video_to_audio(video_path: str) -> str:
11
+ """Extract audio from video using ffmpeg and return audio filename."""
12
+ audio_filename = f"{uuid.uuid4()}.mp3"
13
+ command = [
14
+ "ffmpeg", "-hide_banner", "-loglevel", "error",
15
+ "-i", video_path, "-vn", "-acodec", "libmp3lame", "-y", audio_filename
16
+ ]
17
+ subprocess.run(command, check=True)
18
+ return audio_filename
19
+
20
+ def extract_text_from_video(video_path: str, max_duration: int = 60) -> Dict[str, Any]:
21
+ """
22
+ Convert video speech to text using Replicate seamless model.
23
+ Steps:
24
+ 1. Extract audio from video.
25
+ 2. Upload audio to R2.
26
+ 3. Run Replicate model.
27
+ """
28
+
29
+ audio_file = video_to_audio(video_path)
30
+
31
+
32
+ # audio_url = upload_to_r2(audio_file, f"audio/{os.path.basename(audio_file)}")
33
+
34
+ try:
35
+ with open(audio_file, "rb") as f:
36
+ # Call Replicate model
37
+ result = replicate_client.run(
38
+ "cjwbw/seamless_communication:668a4fec05a887143e5fe8d45df25ec4c794dd43169b9a11562309b2d45873b0",
39
+ input={
40
+ "task_name": "S2ST (Speech to Speech translation)",
41
+ "input_audio": f,
42
+ "input_text_language": "None",
43
+ "max_input_audio_length": max_duration,
44
+ "target_language_text_only": "English",
45
+ "target_language_with_speech": "English",
46
+ },
47
+ )
48
+ finally:
49
+ try:
50
+ os.remove(audio_file)
51
+ except Exception:
52
+ pass
53
+
54
+ return result if result else {"error": "No output"}