Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,9 +9,8 @@ import requests
|
|
| 9 |
import json
|
| 10 |
import arabic_reshaper # pip install arabic-reshaper
|
| 11 |
from bidi.algorithm import get_display # pip install python-bidi
|
| 12 |
-
from moviepy import
|
| 13 |
import pysrt
|
| 14 |
-
import instaloader
|
| 15 |
import time
|
| 16 |
import re
|
| 17 |
import concurrent.futures
|
|
@@ -20,206 +19,6 @@ api_key = "268976:66f4f58a2a905"
|
|
| 20 |
|
| 21 |
|
| 22 |
|
| 23 |
-
|
| 24 |
-
def fetch_data(url):
|
| 25 |
-
try:
|
| 26 |
-
response = requests.get(url)
|
| 27 |
-
response.raise_for_status()
|
| 28 |
-
return response.json()
|
| 29 |
-
except requests.exceptions.RequestException as e:
|
| 30 |
-
print(f"An error occurred: {e}")
|
| 31 |
-
return None
|
| 32 |
-
|
| 33 |
-
def download_file(url):
|
| 34 |
-
try:
|
| 35 |
-
response = requests.get(url.split("#")[0], stream=True)
|
| 36 |
-
response.raise_for_status()
|
| 37 |
-
print(url.split("#")[1])
|
| 38 |
-
with open(url.split("#")[1], 'wb') as file:
|
| 39 |
-
for chunk in response.iter_content(chunk_size=8192):
|
| 40 |
-
if chunk:
|
| 41 |
-
file.write(chunk)
|
| 42 |
-
print(f"Downloaded successfully: {url.split('#')[1]}")
|
| 43 |
-
except requests.exceptions.RequestException as e:
|
| 44 |
-
print(f"An error occurred: {e}")
|
| 45 |
-
|
| 46 |
-
def download_chunk(url, start, end, filename, index):
|
| 47 |
-
headers = {'Range': f'bytes={start}-{end}'}
|
| 48 |
-
response = requests.get(url, headers=headers, stream=True)
|
| 49 |
-
response.raise_for_status()
|
| 50 |
-
chunk_filename = f'{filename}.part{index}'
|
| 51 |
-
with open(chunk_filename, 'wb') as file:
|
| 52 |
-
for chunk in response.iter_content(chunk_size=8192):
|
| 53 |
-
if chunk:
|
| 54 |
-
file.write(chunk)
|
| 55 |
-
return chunk_filename
|
| 56 |
-
|
| 57 |
-
def merge_files(filename, num_parts):
|
| 58 |
-
with open(filename, 'wb') as output_file:
|
| 59 |
-
for i in range(num_parts):
|
| 60 |
-
part_filename = f'{filename}.part{i}'
|
| 61 |
-
with open(part_filename, 'rb') as part_file:
|
| 62 |
-
output_file.write(part_file.read())
|
| 63 |
-
# Optionally, delete the part file after merging
|
| 64 |
-
# os.remove(part_filename)
|
| 65 |
-
|
| 66 |
-
def download_file_in_parallel(link, size, num_threads=4):
|
| 67 |
-
url = link.split("#")[0]
|
| 68 |
-
filename = link.split("#")[1]
|
| 69 |
-
print(url+" filename: "+filename)
|
| 70 |
-
response = requests.head(url)
|
| 71 |
-
#file_size = int(response.headers['Content-Length'])
|
| 72 |
-
chunk_size = size // num_threads
|
| 73 |
-
|
| 74 |
-
ranges = [(i * chunk_size, (i + 1) * chunk_size - 1) for i in range(num_threads)]
|
| 75 |
-
ranges[-1] = (ranges[-1][0], size - 1) # Adjust the last range to the end of the file
|
| 76 |
-
|
| 77 |
-
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
|
| 78 |
-
futures = [
|
| 79 |
-
executor.submit(download_chunk, url, start, end, filename, i)
|
| 80 |
-
for i, (start, end) in enumerate(ranges)
|
| 81 |
-
]
|
| 82 |
-
for future in concurrent.futures.as_completed(futures):
|
| 83 |
-
future.result() # Ensure all threads complete
|
| 84 |
-
|
| 85 |
-
merge_files(filename, num_threads)
|
| 86 |
-
print(f'Downloaded successfully: {filename}')
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
def one_youtube(link, api_key):
|
| 91 |
-
|
| 92 |
-
# Fetch video ID
|
| 93 |
-
video_id_url = f"https://one-api.ir/youtube/?token={api_key}&action=getvideoid&link={link}"
|
| 94 |
-
video_data = fetch_data(video_id_url)
|
| 95 |
-
if not video_data:
|
| 96 |
-
return None, None
|
| 97 |
-
|
| 98 |
-
video_id = video_data["result"]
|
| 99 |
-
|
| 100 |
-
# Fetch video data
|
| 101 |
-
filter_option = "" # Replace with your filter option
|
| 102 |
-
video_data_url = f"https://youtube.one-api.ir/?token={api_key}&action=fullvideo&id={video_id}&filter={filter_option}"
|
| 103 |
-
video_data_2 = fetch_data(video_data_url)
|
| 104 |
-
if not video_data_2:
|
| 105 |
-
return None, None
|
| 106 |
-
|
| 107 |
-
formats_list = video_data_2["result"]["formats"]
|
| 108 |
-
file_name = video_data_2["result"]["title"]
|
| 109 |
-
video_name = f'{file_name}.mp4'
|
| 110 |
-
audio_name = f'{file_name}.mp3'
|
| 111 |
-
|
| 112 |
-
for f in formats_list:
|
| 113 |
-
if f["format_note"] == "360p":
|
| 114 |
-
download_id = f["id"]
|
| 115 |
-
video_size = f["filesize"]
|
| 116 |
-
for f in formats_list:
|
| 117 |
-
if f["format_note"] == "medium":
|
| 118 |
-
audio_id = f["id"]
|
| 119 |
-
audio_size = f["filesize"]
|
| 120 |
-
|
| 121 |
-
if not download_id or not audio_id:
|
| 122 |
-
return None, None
|
| 123 |
-
|
| 124 |
-
# Fetch video and audio links
|
| 125 |
-
video_link_url = f"https://youtube.one-api.ir/?token={api_key}&action=download&id={download_id}"
|
| 126 |
-
audio_link_url = f"https://youtube.one-api.ir/?token={api_key}&action=download&id={audio_id}"
|
| 127 |
-
video_link_data = fetch_data(video_link_url)
|
| 128 |
-
audio_link_data = fetch_data(audio_link_url)
|
| 129 |
-
if not video_link_data or not audio_link_data:
|
| 130 |
-
return None, None
|
| 131 |
-
|
| 132 |
-
video_link = video_link_data["result"]["link"]
|
| 133 |
-
audio_link = audio_link_data["result"]["link"]
|
| 134 |
-
vid_str=video_link+"#"+video_name
|
| 135 |
-
audio_str=audio_link+"#"+audio_name
|
| 136 |
-
# Download video and audio files
|
| 137 |
-
print(video_size , audio_size)
|
| 138 |
-
download_file_in_parallel(vid_str, video_size)
|
| 139 |
-
download_file_in_parallel(audio_str, audio_size)
|
| 140 |
-
|
| 141 |
-
return video_name, audio_name
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
# Define your functions here
|
| 145 |
-
def yt_download(url):
|
| 146 |
-
yt = YouTube(url)
|
| 147 |
-
print(yt.title)
|
| 148 |
-
video_path = f"{yt.title}.mp4"
|
| 149 |
-
ys = yt.streams.get_highest_resolution()
|
| 150 |
-
print(ys)
|
| 151 |
-
ys.download()
|
| 152 |
-
return video_path, yt.title
|
| 153 |
-
|
| 154 |
-
def insta_oneapi(url, api_key):
|
| 155 |
-
shortcode = url.split("/")[-1]
|
| 156 |
-
print(shortcode)
|
| 157 |
-
url_one="https://api.one-api.ir/instagram/v1/post/?shortcode="+shortcode
|
| 158 |
-
request_body = [{"shortcode": shortcode},]
|
| 159 |
-
headers = {"one-api-token": api_key, "Content-Type": "application/json"}
|
| 160 |
-
response = requests.get(url_one, headers=headers)
|
| 161 |
-
print(response)
|
| 162 |
-
if response.status_code == 200:
|
| 163 |
-
|
| 164 |
-
result = response.json()
|
| 165 |
-
try:
|
| 166 |
-
time.sleep(10)
|
| 167 |
-
response = requests.get(result["result"]['media'][0]["url"], stream=True)
|
| 168 |
-
response.raise_for_status()
|
| 169 |
-
with open("video.mp4", 'wb') as file:
|
| 170 |
-
for chunk in response.iter_content(chunk_size=8192):
|
| 171 |
-
if chunk:
|
| 172 |
-
file.write(chunk)
|
| 173 |
-
print(f"Downloaded successfully")
|
| 174 |
-
return "video.mp4"
|
| 175 |
-
except requests.exceptions.RequestException as e:
|
| 176 |
-
print(f"An error occurred: {e}")
|
| 177 |
-
else:
|
| 178 |
-
print(f"Error: {response.status_code}, {response.text}")
|
| 179 |
-
return None
|
| 180 |
-
|
| 181 |
-
def insta_download(permalink):
|
| 182 |
-
# Create an instance of Instaloader
|
| 183 |
-
L = instaloader.Instaloader()
|
| 184 |
-
|
| 185 |
-
try:
|
| 186 |
-
# Extract the shortcode from the permalink
|
| 187 |
-
if "instagram.com/reel/" in permalink:
|
| 188 |
-
shortcode = permalink.split("instagram.com/reel/")[-1].split("/")[0]
|
| 189 |
-
elif "instagram.com/p/" in permalink:
|
| 190 |
-
shortcode = permalink.split("instagram.com/p/")[-1].split("/")[0]
|
| 191 |
-
else:
|
| 192 |
-
raise ValueError("Invalid permalink format")
|
| 193 |
-
|
| 194 |
-
# Load the post using the shortcode
|
| 195 |
-
post = instaloader.Post.from_shortcode(L.context, shortcode)
|
| 196 |
-
|
| 197 |
-
# Check if the post is a video
|
| 198 |
-
if not post.is_video:
|
| 199 |
-
raise ValueError("The provided permalink is not a video.")
|
| 200 |
-
|
| 201 |
-
# Get the video URL
|
| 202 |
-
video_url = post.video_url
|
| 203 |
-
|
| 204 |
-
# Extract the filename from the URL
|
| 205 |
-
filename = video_url.split("/")[-1]
|
| 206 |
-
# Remove query parameters
|
| 207 |
-
filename = filename.split("?")[0]
|
| 208 |
-
|
| 209 |
-
# Download the video using requests
|
| 210 |
-
response = requests.get(video_url, stream=True)
|
| 211 |
-
response.raise_for_status() # Raise an error for bad responses
|
| 212 |
-
|
| 213 |
-
# Save the content to a file
|
| 214 |
-
with open(filename, 'wb') as file:
|
| 215 |
-
for chunk in response.iter_content(chunk_size=8192):
|
| 216 |
-
file.write(chunk)
|
| 217 |
-
|
| 218 |
-
print(f"Downloaded video {filename} successfully.")
|
| 219 |
-
return filename
|
| 220 |
-
except Exception as e:
|
| 221 |
-
print(f"Failed to download video from {permalink}: {e}")
|
| 222 |
-
|
| 223 |
def extract_audio(input_video_name):
|
| 224 |
# Define the input video file and output audio file
|
| 225 |
mp3_file = "audio.mp3"
|
|
@@ -240,13 +39,59 @@ def extract_audio(input_video_name):
|
|
| 240 |
print("Audio extraction successful!")
|
| 241 |
return mp3_file
|
| 242 |
|
| 243 |
-
def transcribe(audio):
|
| 244 |
-
model = WhisperModel("tiny")
|
| 245 |
-
segments, info = model.transcribe(audio)
|
| 246 |
-
segments = list(segments)
|
|
|
|
| 247 |
for segment in segments:
|
| 248 |
-
|
| 249 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
|
| 251 |
def format_time(seconds):
|
| 252 |
hours = math.floor(seconds / 3600)
|
|
@@ -262,107 +107,32 @@ def generate_subtitle_file(language, segments, input_video_name):
|
|
| 262 |
subtitle_file = f"sub-{input_video_name}.{language}.srt"
|
| 263 |
text = ""
|
| 264 |
for index, segment in enumerate(segments):
|
| 265 |
-
segment_start = format_time(segment
|
| 266 |
-
segment_end = format_time(segment
|
| 267 |
text += f"{str(index+1)} \n"
|
| 268 |
text += f"{segment_start} --> {segment_end} \n"
|
| 269 |
-
text += f"{segment
|
| 270 |
text += "\n"
|
| 271 |
f = open(subtitle_file, "w", encoding='utf8')
|
| 272 |
f.write(text)
|
| 273 |
f.close()
|
| 274 |
return subtitle_file
|
| 275 |
|
| 276 |
-
def read_srt_file(file_path):
|
| 277 |
-
try:
|
| 278 |
-
with open(file_path, 'r', encoding='utf-8') as file:
|
| 279 |
-
srt_content = file.read()
|
| 280 |
-
return srt_content
|
| 281 |
-
except FileNotFoundError:
|
| 282 |
-
print(f"The file {file_path} was not found.")
|
| 283 |
-
except Exception as e:
|
| 284 |
-
print(f"An error occurred: {e}")
|
| 285 |
-
|
| 286 |
-
def write_srt(subtitle_text, output_file="edited_srt.srt"):
|
| 287 |
-
with open(output_file+".srt", 'w', encoding="utf-8") as file:
|
| 288 |
-
file.write(subtitle_text)
|
| 289 |
-
return output_file+".srt"
|
| 290 |
-
|
| 291 |
-
def generate_translated_subtitle(language, segments, input_video_name):
|
| 292 |
-
input_video_name=input_video_name.split('/')[-1]
|
| 293 |
-
subtitle_file = f"{input_video_name}.srt"
|
| 294 |
-
text = ""
|
| 295 |
-
lines = segments.split('\n')
|
| 296 |
-
new_list = [item for item in lines if item != '']
|
| 297 |
-
segment_number = 1
|
| 298 |
-
|
| 299 |
-
for index, segment in enumerate(new_list):
|
| 300 |
-
if (index+1) % 3 == 1 or (index+1)==1:
|
| 301 |
-
text += f"{segment}\n"
|
| 302 |
-
segment_number += 1
|
| 303 |
-
if (index+1) % 3 == 2 or (index+1)==2:
|
| 304 |
-
text += segment + "\n"
|
| 305 |
-
if (index+1) % 3 == 0:
|
| 306 |
-
text += f"\u200F{segment}\n\n"
|
| 307 |
-
|
| 308 |
-
with open(subtitle_file, "w", encoding='utf8') as f:
|
| 309 |
-
f.write(text)
|
| 310 |
-
return subtitle_file
|
| 311 |
-
|
| 312 |
def clean_text(text):
|
| 313 |
# Remove 'srt ' from the start of each line
|
| 314 |
# Remove ''' from the start and end
|
| 315 |
text = re.sub(r"^```|```$", '', text)
|
| 316 |
text = re.sub(r'^srt', '', text, flags=re.MULTILINE)
|
| 317 |
return text
|
| 318 |
-
|
| 319 |
-
def split_srt_file(input_file, max_chars=3000):
|
| 320 |
-
# Read the contents of the SRT file
|
| 321 |
-
with open(input_file, 'r', encoding='utf-8') as file:
|
| 322 |
-
content = file.read()
|
| 323 |
-
|
| 324 |
-
# Split the content into individual subtitles
|
| 325 |
-
subtitles = content.strip().split('\n\n')
|
| 326 |
-
|
| 327 |
-
# Prepare to write the split files
|
| 328 |
-
output_files = []
|
| 329 |
-
current_file_content = ''
|
| 330 |
-
current_file_index = 1
|
| 331 |
-
|
| 332 |
-
for subtitle in subtitles:
|
| 333 |
-
# Check if adding this subtitle would exceed the character limit
|
| 334 |
-
if len(current_file_content) + len(subtitle) + 2 > max_chars: # +2 for \n\n
|
| 335 |
-
# Write the current file
|
| 336 |
-
output_file_name = f'split_{current_file_index}.srt'
|
| 337 |
-
with open(output_file_name, 'w', encoding='utf-8') as output_file:
|
| 338 |
-
output_file.write(current_file_content.strip())
|
| 339 |
-
output_files.append(output_file_name)
|
| 340 |
-
|
| 341 |
-
# Prepare for the next file
|
| 342 |
-
current_file_index += 1
|
| 343 |
-
current_file_content = subtitle + '\n\n'
|
| 344 |
-
else:
|
| 345 |
-
# If it fits, add the subtitle
|
| 346 |
-
current_file_content += subtitle + '\n\n'
|
| 347 |
|
| 348 |
-
|
| 349 |
-
if current_file_content:
|
| 350 |
-
output_file_name = f'split_{current_file_index}.srt'
|
| 351 |
-
with open(output_file_name, 'w', encoding='utf-8') as output_file:
|
| 352 |
-
output_file.write(current_file_content.strip())
|
| 353 |
-
output_files.append(output_file_name)
|
| 354 |
-
|
| 355 |
-
return output_files
|
| 356 |
-
|
| 357 |
-
def translate_text(api_key, source_lang, target_lang, text):
|
| 358 |
url = "https://api.one-api.ir/translate/v1/google/"
|
| 359 |
request_body = {"source": source_lang, "target": target_lang, "text": text}
|
| 360 |
headers = {"one-api-token": api_key, "Content-Type": "application/json"}
|
| 361 |
response = requests.post(url, headers=headers, json=request_body)
|
| 362 |
if response.status_code == 200:
|
| 363 |
result = response.json()
|
| 364 |
-
|
| 365 |
-
return enhanced_text
|
| 366 |
else:
|
| 367 |
print(f"Error: {response.status_code}, {response.text}")
|
| 368 |
return None
|
|
@@ -411,103 +181,73 @@ def enhance_text(api_key, text):
|
|
| 411 |
attempts += 1
|
| 412 |
time.sleep(30)
|
| 413 |
print("Error Max attempts reached. Could not retrieve a successful response.")
|
| 414 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 415 |
|
| 416 |
def write_google(google_translate):
|
| 417 |
google = "google_translate.srt"
|
| 418 |
-
with open(google, '
|
| 419 |
f.write(google_translate)
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
for
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
return
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
#input_video, input_audio = one_youtube(url, api_key)
|
| 458 |
-
input_video, title = yt_download(url)
|
| 459 |
-
input_video_name = input_video.replace(".mp4", "")
|
| 460 |
-
input_audio = extract_audio(input_video)
|
| 461 |
-
segments = transcribe(audio=input_audio)
|
| 462 |
-
language = "fa"
|
| 463 |
-
subtitle_file = generate_subtitle_file(language=language, segments=segments, input_video_name=input_video_name)
|
| 464 |
-
source_language = "en"
|
| 465 |
-
target_language = "fa"
|
| 466 |
-
#srt_string = read_srt_file(subtitle_file)
|
| 467 |
-
srt_files=split_srt_file(subtitle_file)
|
| 468 |
-
for i in srt_files:
|
| 469 |
-
srt_string = read_srt_file(f"{i}")
|
| 470 |
-
#google_translate = translate_text(api_key, source_language, target_language, srt_string)
|
| 471 |
-
google_translate = enhance_text(api_key, srt_string)
|
| 472 |
-
if google_translate == 0 :
|
| 473 |
-
google_translate = translate_text(api_key, source_language, target_language, srt_string)
|
| 474 |
-
write_google(google_translate)
|
| 475 |
-
time.sleep(15)
|
| 476 |
-
srt = read_srt_file("google_translate.srt")
|
| 477 |
-
os.remove("google_translate.srt")
|
| 478 |
-
return srt, video, input_audio
|
| 479 |
-
|
| 480 |
-
def video_edit(srt, input_video, input_audio= 'audio.mp3'):
|
| 481 |
-
input_video_name = input_video.replace(".mp4", "")
|
| 482 |
-
srt_name=generate_translated_subtitle("fa", srt, input_video_name)
|
| 483 |
-
return input_video, srt_name
|
| 484 |
-
""" input_video_name = input_video.replace(''.mp4', '')
|
| 485 |
-
video = VideoFileClip(input_video)
|
| 486 |
-
audio = AudioFileClip(input_audio)
|
| 487 |
-
video = video.with_audio(audio)
|
| 488 |
-
print(video)
|
| 489 |
-
output_video_file = input_video_name + '_subtitled' + '.mp4'
|
| 490 |
-
write_srt(srt)
|
| 491 |
-
subtitles = pysrt.open('edited_srt.srt', encoding='utf-8')
|
| 492 |
-
subtitle_clips = create_subtitle_clips(subtitles, video.size, 32, 'arial.ttf', 'white', False)
|
| 493 |
-
final_video = CompositeVideoClip([video] + subtitle_clips)
|
| 494 |
-
final_video.write_videofile(output_video_file, codec='libx264', audio_codec='aac', logger=None)
|
| 495 |
-
os.remove('google_translate.srt')
|
| 496 |
-
print('final')"""
|
| 497 |
-
|
| 498 |
-
|
| 499 |
with gr.Blocks() as demo:
|
| 500 |
gr.Markdown("Start typing below and then click **Run** to see the output.")
|
| 501 |
-
with gr.
|
| 502 |
-
inp = gr.Textbox(placeholder="Enter URL or upload")
|
| 503 |
-
drp = gr.Dropdown(["insta", "youtube"])
|
| 504 |
-
btn = gr.Button("transcribe")
|
| 505 |
-
out = gr.Textbox(interactive=True)
|
| 506 |
video_file_input = gr.Video(label="Upload Video File")
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
|
|
|
| 511 |
vid_out = gr.Video()
|
| 512 |
srt_file = gr.File()
|
| 513 |
btn2 = gr.Button("transcribe")
|
|
@@ -515,7 +255,7 @@ with gr.Blocks() as demo:
|
|
| 515 |
triggers=[btn2.click],
|
| 516 |
fn=write_google,
|
| 517 |
inputs=out,
|
| 518 |
-
).then(video_edit, [out, video_path_output, audio_path_output], outputs=[vid_out, srt_file])
|
| 519 |
|
| 520 |
|
| 521 |
demo.launch(debug=True)
|
|
|
|
| 9 |
import json
|
| 10 |
import arabic_reshaper # pip install arabic-reshaper
|
| 11 |
from bidi.algorithm import get_display # pip install python-bidi
|
| 12 |
+
from moviepy import *
|
| 13 |
import pysrt
|
|
|
|
| 14 |
import time
|
| 15 |
import re
|
| 16 |
import concurrent.futures
|
|
|
|
| 19 |
|
| 20 |
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
def extract_audio(input_video_name):
|
| 23 |
# Define the input video file and output audio file
|
| 24 |
mp3_file = "audio.mp3"
|
|
|
|
| 39 |
print("Audio extraction successful!")
|
| 40 |
return mp3_file
|
| 41 |
|
| 42 |
+
def transcribe(audio, max_segment_duration=2.0): # Set your desired max duration here
|
| 43 |
+
model = WhisperModel("tiny", device="cpu", cpu_threads=12, local_files_only=True)
|
| 44 |
+
segments, info = model.transcribe(audio, vad_filter=True, vad_parameters=dict(min_silence_duration_ms=1500), word_timestamps=True, log_progress=True)
|
| 45 |
+
segments = list(segments) # The transcription will actually run here.
|
| 46 |
+
wordlevel_info = []
|
| 47 |
for segment in segments:
|
| 48 |
+
for word in segment.words:
|
| 49 |
+
print("[%.2fs -> %.2fs] %s" % (word.start, word.end, word.word))
|
| 50 |
+
wordlevel_info.append({'word':word.word,'start':word.start,'end':word.end})
|
| 51 |
+
return wordlevel_info
|
| 52 |
+
|
| 53 |
+
def create_subtitles(wordlevel_info):
|
| 54 |
+
punctuation_marks = {'.', '!', '?', ',', ';', ':', '—', '-', '。', '!', '?'} # Add/remove punctuation as needed
|
| 55 |
+
subtitles = []
|
| 56 |
+
line = []
|
| 57 |
+
|
| 58 |
+
for word_data in wordlevel_info:
|
| 59 |
+
line.append(word_data)
|
| 60 |
+
current_word = word_data['word']
|
| 61 |
+
|
| 62 |
+
# Check if current word ends with punctuation or line reached 5 words
|
| 63 |
+
ends_with_punct = current_word and (current_word[-1] in punctuation_marks)
|
| 64 |
+
|
| 65 |
+
if ends_with_punct or len(line) == 5:
|
| 66 |
+
# Create a new subtitle segment
|
| 67 |
+
subtitle = {
|
| 68 |
+
"word": " ".join(item["word"] for item in line),
|
| 69 |
+
"start": line[0]["start"],
|
| 70 |
+
"end": line[-1]["end"],
|
| 71 |
+
"textcontents": line.copy()
|
| 72 |
+
}
|
| 73 |
+
subtitles.append(subtitle)
|
| 74 |
+
line = []
|
| 75 |
+
|
| 76 |
+
# Add remaining words if any
|
| 77 |
+
if line:
|
| 78 |
+
subtitle = {
|
| 79 |
+
"word": " ".join(item["word"] for item in line),
|
| 80 |
+
"start": line[0]["start"],
|
| 81 |
+
"end": line[-1]["end"],
|
| 82 |
+
"textcontents": line.copy()
|
| 83 |
+
}
|
| 84 |
+
subtitles.append(subtitle)
|
| 85 |
+
|
| 86 |
+
# Remove gaps between segments by extending the previous segment's end time
|
| 87 |
+
for i in range(1, len(subtitles)):
|
| 88 |
+
prev_subtitle = subtitles[i - 1]
|
| 89 |
+
current_subtitle = subtitles[i]
|
| 90 |
+
|
| 91 |
+
# Extend the previous segment's end time to the start of the current segment
|
| 92 |
+
prev_subtitle["end"] = current_subtitle["start"]
|
| 93 |
+
|
| 94 |
+
return subtitles
|
| 95 |
|
| 96 |
def format_time(seconds):
|
| 97 |
hours = math.floor(seconds / 3600)
|
|
|
|
| 107 |
subtitle_file = f"sub-{input_video_name}.{language}.srt"
|
| 108 |
text = ""
|
| 109 |
for index, segment in enumerate(segments):
|
| 110 |
+
segment_start = format_time(segment['start'])
|
| 111 |
+
segment_end = format_time(segment['end'])
|
| 112 |
text += f"{str(index+1)} \n"
|
| 113 |
text += f"{segment_start} --> {segment_end} \n"
|
| 114 |
+
text += f"{segment['word']} \n"
|
| 115 |
text += "\n"
|
| 116 |
f = open(subtitle_file, "w", encoding='utf8')
|
| 117 |
f.write(text)
|
| 118 |
f.close()
|
| 119 |
return subtitle_file
|
| 120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
def clean_text(text):
|
| 122 |
# Remove 'srt ' from the start of each line
|
| 123 |
# Remove ''' from the start and end
|
| 124 |
text = re.sub(r"^```|```$", '', text)
|
| 125 |
text = re.sub(r'^srt', '', text, flags=re.MULTILINE)
|
| 126 |
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
+
def translate_text(api_key, text, source_language = "en", target_language = "fa"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
url = "https://api.one-api.ir/translate/v1/google/"
|
| 130 |
request_body = {"source": source_lang, "target": target_lang, "text": text}
|
| 131 |
headers = {"one-api-token": api_key, "Content-Type": "application/json"}
|
| 132 |
response = requests.post(url, headers=headers, json=request_body)
|
| 133 |
if response.status_code == 200:
|
| 134 |
result = response.json()
|
| 135 |
+
return result['result']
|
|
|
|
| 136 |
else:
|
| 137 |
print(f"Error: {response.status_code}, {response.text}")
|
| 138 |
return None
|
|
|
|
| 181 |
attempts += 1
|
| 182 |
time.sleep(30)
|
| 183 |
print("Error Max attempts reached. Could not retrieve a successful response.")
|
| 184 |
+
te = translate_text(api_key, text)
|
| 185 |
+
return te
|
| 186 |
+
|
| 187 |
+
def read_srt_file(file_path):
|
| 188 |
+
try:
|
| 189 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
| 190 |
+
srt_content = file.read()
|
| 191 |
+
return srt_content
|
| 192 |
+
except FileNotFoundError:
|
| 193 |
+
print(f"The file {file_path} was not found.")
|
| 194 |
+
except Exception as e:
|
| 195 |
+
print(f"An error occurred: {e}")
|
| 196 |
+
|
| 197 |
+
def write_srt(subtitle_text, output_file="edited_srt.srt"):
|
| 198 |
+
with open(output_file, 'w', encoding="utf-8") as file:
|
| 199 |
+
file.write(subtitle_text)
|
| 200 |
|
| 201 |
def write_google(google_translate):
|
| 202 |
google = "google_translate.srt"
|
| 203 |
+
with open(google, 'w', encoding="utf-8") as f:
|
| 204 |
f.write(google_translate)
|
| 205 |
+
return google
|
| 206 |
+
|
| 207 |
+
def generate_translated_subtitle(language, segments, input_video_name):
|
| 208 |
+
input_video_name=input_video_name.split('/')[-1]
|
| 209 |
+
subtitle_file = f"{input_video_name}.srt"
|
| 210 |
+
text = ""
|
| 211 |
+
lines = segments.split('\n')
|
| 212 |
+
new_list = [item for item in lines if item != '']
|
| 213 |
+
segment_number = 1
|
| 214 |
+
|
| 215 |
+
for index, segment in enumerate(new_list):
|
| 216 |
+
if (index+1) % 3 == 1 or (index+1)==1:
|
| 217 |
+
text += f"{segment}\n"
|
| 218 |
+
segment_number += 1
|
| 219 |
+
if (index+1) % 3 == 2 or (index+1)==2:
|
| 220 |
+
text += segment + "\n"
|
| 221 |
+
if (index+1) % 3 == 0:
|
| 222 |
+
text += f"\u200F{segment}\n\n"
|
| 223 |
+
|
| 224 |
+
with open(subtitle_file, "w", encoding='utf8') as f:
|
| 225 |
+
f.write(text)
|
| 226 |
+
return subtitle_file
|
| 227 |
+
|
| 228 |
+
def process_video(video, clip_type):
|
| 229 |
+
|
| 230 |
+
mp3_file=extract_audio(video)
|
| 231 |
+
wordlevel_info=transcribe(mp3_file)
|
| 232 |
+
subtitles = create_subtitles(wordlevel_info)
|
| 233 |
+
subtitle_file = generate_subtitle_file('fa', subtitles, 'video_subtitled')
|
| 234 |
+
srt_string = read_srt_file(subtitle_file)
|
| 235 |
+
google_translate = enhance_text(api_key, srt_string)
|
| 236 |
+
srt = write_google(google_translate)
|
| 237 |
+
#segments = pysrt.open(srt, encoding="utf-8")
|
| 238 |
+
sub = generate_translated_subtitle("fa", google_translate, "video_subtitled")
|
| 239 |
+
return sub
|
| 240 |
+
|
| 241 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
with gr.Blocks() as demo:
|
| 243 |
gr.Markdown("Start typing below and then click **Run** to see the output.")
|
| 244 |
+
with gr.Column():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
video_file_input = gr.Video(label="Upload Video File")
|
| 246 |
+
clip_type = gr.Dropdown(["auto edit", "default"], label="Clip Type")
|
| 247 |
+
btn = gr.Button("create")
|
| 248 |
+
srt_file_output = gr.Text(label="result: ")
|
| 249 |
+
btn.click(fn=process_video, inputs=[video_file_input], outputs=srt_file_output)
|
| 250 |
+
""" with gr.Row():
|
| 251 |
vid_out = gr.Video()
|
| 252 |
srt_file = gr.File()
|
| 253 |
btn2 = gr.Button("transcribe")
|
|
|
|
| 255 |
triggers=[btn2.click],
|
| 256 |
fn=write_google,
|
| 257 |
inputs=out,
|
| 258 |
+
).then(video_edit, [out, video_path_output, audio_path_output], outputs=[vid_out, srt_file])"""
|
| 259 |
|
| 260 |
|
| 261 |
demo.launch(debug=True)
|