Spaces:
Sleeping
Sleeping
Commit ·
276a4df
1
Parent(s): 0890748
fix arabic fonts
Browse files- core/analyze.py +1 -1
- core/free_translator.py +24 -59
- core/stt.py +1 -1
- core/subtitle_manager.py +1 -7
- fix_fonts.py +26 -0
- requirements.txt +2 -1
- test_arabic.png +0 -0
core/analyze.py
CHANGED
|
@@ -70,7 +70,7 @@ def analyze_transcript(transcript):
|
|
| 70 |
for attempt in range(max_retries):
|
| 71 |
try:
|
| 72 |
response = client.chat.completions.create(
|
| 73 |
-
model="
|
| 74 |
messages=[
|
| 75 |
{"role": "system", "content": "You are a helpful assistant that outputs only valid JSON."},
|
| 76 |
{"role": "user", "content": prompt}
|
|
|
|
| 70 |
for attempt in range(max_retries):
|
| 71 |
try:
|
| 72 |
response = client.chat.completions.create(
|
| 73 |
+
model="nvidia/nemotron-3-nano-30b-a3b:free",
|
| 74 |
messages=[
|
| 75 |
{"role": "system", "content": "You are a helpful assistant that outputs only valid JSON."},
|
| 76 |
{"role": "user", "content": prompt}
|
core/free_translator.py
CHANGED
|
@@ -1,70 +1,35 @@
|
|
| 1 |
-
import
|
| 2 |
-
import json
|
| 3 |
-
import urllib.request
|
| 4 |
import urllib.parse
|
|
|
|
| 5 |
|
| 6 |
class FreeTranslator:
|
| 7 |
def __init__(self):
|
|
|
|
| 8 |
pass
|
| 9 |
-
|
| 10 |
-
def translate_text(self, text, target_language_code, source_language_code="en"):
|
| 11 |
-
"""ترجمة مجانية باستخدام MyMemory API بدون httpx"""
|
| 12 |
-
if not text.strip():
|
| 13 |
-
return "", []
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
|
|
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
try:
|
| 23 |
-
#
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
'langpair': f'{source_lang}|{target_lang}'
|
| 28 |
-
}
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
# استخدام urllib.request بدلاً من requests
|
| 34 |
-
req = urllib.request.Request(full_url)
|
| 35 |
-
req.add_header('User-Agent', 'Mozilla/5.0')
|
| 36 |
-
|
| 37 |
-
with urllib.request.urlopen(req, timeout=10) as response:
|
| 38 |
-
if response.status == 200:
|
| 39 |
-
data = json.loads(response.read().decode())
|
| 40 |
-
if data.get('responseStatus') == 200:
|
| 41 |
-
translated_text = data['responseData']['translatedText']
|
| 42 |
-
|
| 43 |
-
# تحديد الكلمات المهمة
|
| 44 |
-
words = translated_text.split()
|
| 45 |
-
highlight_words = []
|
| 46 |
-
|
| 47 |
-
# كلمات حماسية شائعة
|
| 48 |
-
exciting_words = [
|
| 49 |
-
"amazing", "incredible", "awesome", "fantastic", "perfect", "best", "ultimate",
|
| 50 |
-
"رائع", "مذهل", "أفضل", "مثالي", "خرافي", "لا يصدق", "عجيب"
|
| 51 |
-
]
|
| 52 |
-
|
| 53 |
-
for word in words:
|
| 54 |
-
clean_word = word.lower().strip(".,!?")
|
| 55 |
-
if clean_word in exciting_words:
|
| 56 |
-
highlight_words.append(word)
|
| 57 |
-
|
| 58 |
-
# إذا مفيش كلمات حماسية، نختار أطول كلمتين
|
| 59 |
-
if not highlight_words and len(words) >= 2:
|
| 60 |
-
sorted_words = sorted(words, key=len, reverse=True)
|
| 61 |
-
highlight_words = sorted_words[:2]
|
| 62 |
-
|
| 63 |
-
return translated_text, highlight_words
|
| 64 |
-
|
| 65 |
-
# fallback: إرجاع النص الأصلي
|
| 66 |
-
return text, []
|
| 67 |
|
|
|
|
|
|
|
| 68 |
except Exception as e:
|
| 69 |
-
|
| 70 |
-
return text,
|
|
|
|
| 1 |
+
import requests
|
|
|
|
|
|
|
| 2 |
import urllib.parse
|
| 3 |
+
from deep_translator import GoogleTranslator
|
| 4 |
|
| 5 |
class FreeTranslator:
|
| 6 |
def __init__(self):
|
| 7 |
+
# deep-translator does not need base_url or API key
|
| 8 |
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
+
def translate_text(self, text, target_language_code, source_language_code="en"):
|
| 11 |
+
"""
|
| 12 |
+
Translates text using deep-translator (Google Translate Web Interface).
|
| 13 |
+
Free, no API key required, and better context handling.
|
| 14 |
+
"""
|
| 15 |
+
if not text or not text.strip():
|
| 16 |
+
return text, "No text provided"
|
| 17 |
|
| 18 |
+
# If source and target are same, return original
|
| 19 |
+
if source_language_code == target_language_code:
|
| 20 |
+
return text, None
|
| 21 |
+
|
| 22 |
try:
|
| 23 |
+
# deep-translator handles long text and context better
|
| 24 |
+
# Note: GoogleTranslator auto-detects source if not provided, but we pass it for accuracy.
|
| 25 |
+
# 'auto' is supported by deep-translator if source is unknown.
|
| 26 |
+
src = source_language_code if source_language_code else 'auto'
|
|
|
|
|
|
|
| 27 |
|
| 28 |
+
translator = GoogleTranslator(source=src, target=target_language_code)
|
| 29 |
+
translated = translator.translate(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
return translated, None
|
| 32 |
+
|
| 33 |
except Exception as e:
|
| 34 |
+
# Fallback or error reporting
|
| 35 |
+
return text, f"Translation error: {str(e)}"
|
core/stt.py
CHANGED
|
@@ -308,7 +308,7 @@ class STT:
|
|
| 308 |
|
| 309 |
segments_iter, info = self.model.transcribe(
|
| 310 |
video_path,
|
| 311 |
-
beam_size=
|
| 312 |
word_timestamps=True, # Always needed for standards & highlight_word
|
| 313 |
language=actual_stt_lang,
|
| 314 |
vad_filter=vad_filter,
|
|
|
|
| 308 |
|
| 309 |
segments_iter, info = self.model.transcribe(
|
| 310 |
video_path,
|
| 311 |
+
beam_size=1,
|
| 312 |
word_timestamps=True, # Always needed for standards & highlight_word
|
| 313 |
language=actual_stt_lang,
|
| 314 |
vad_filter=vad_filter,
|
core/subtitle_manager.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
"""
|
| 2 |
SubtitleManager — Viral YouTube Shorts Caption Engine
|
| 3 |
Styles tuned for 2024-2025 Shorts/Reels/TikTok viral aesthetics.
|
| 4 |
-
|
| 5 |
✅ Fixes & Improvements:
|
| 6 |
- Shadow layers now actually rendered (was `pass` in original)
|
| 7 |
- active_word_index (int) replaces unreliable id() comparison
|
|
@@ -288,19 +287,15 @@ def _is_arabic_script(text: str) -> bool:
|
|
| 288 |
def _prepare_display_text(raw: str, is_rtl: bool, language: str = None) -> str:
|
| 289 |
"""
|
| 290 |
Prepares text for correct rendering in Pillow (PIL.ImageDraw).
|
| 291 |
-
|
| 292 |
Pipeline for Arabic/Persian/Urdu:
|
| 293 |
1. ArabicReshaper.reshape() — converts Unicode isolated codepoints to
|
| 294 |
contextual presentation forms + joins ligatures.
|
| 295 |
This is MANDATORY for Pillow because FreeType does NOT do this.
|
| 296 |
2. bidi.get_display() — reorders characters right-to-left.
|
| 297 |
-
|
| 298 |
Pipeline for Hebrew:
|
| 299 |
bidi.get_display() only — Hebrew has no contextual shaping requirement.
|
| 300 |
-
|
| 301 |
Pipeline for Latin/Cyrillic:
|
| 302 |
uppercase only.
|
| 303 |
-
|
| 304 |
Pipeline for CJK/Thai/Devanagari:
|
| 305 |
as-is (no uppercase, no bidi needed at the Pillow level).
|
| 306 |
"""
|
|
@@ -672,7 +667,6 @@ class SubtitleManager:
|
|
| 672 |
) -> list:
|
| 673 |
"""
|
| 674 |
Generates all caption ImageClips ready for compositing.
|
| 675 |
-
|
| 676 |
Arabic caption_style recommendations:
|
| 677 |
"cairo_bold" → best for Egyptian/Gulf social media content
|
| 678 |
"tajawal_bold" → modern geometric, dark background, great readability
|
|
@@ -873,7 +867,7 @@ class SubtitleManager:
|
|
| 873 |
caption_mode: str = "sentence",
|
| 874 |
caption_style: str = "classic",
|
| 875 |
):
|
| 876 |
-
clips = SubtitleManager.create_caption_clips(
|
| 877 |
transcript_data,
|
| 878 |
size = size,
|
| 879 |
language = language,
|
|
|
|
| 1 |
"""
|
| 2 |
SubtitleManager — Viral YouTube Shorts Caption Engine
|
| 3 |
Styles tuned for 2024-2025 Shorts/Reels/TikTok viral aesthetics.
|
|
|
|
| 4 |
✅ Fixes & Improvements:
|
| 5 |
- Shadow layers now actually rendered (was `pass` in original)
|
| 6 |
- active_word_index (int) replaces unreliable id() comparison
|
|
|
|
| 287 |
def _prepare_display_text(raw: str, is_rtl: bool, language: str = None) -> str:
|
| 288 |
"""
|
| 289 |
Prepares text for correct rendering in Pillow (PIL.ImageDraw).
|
|
|
|
| 290 |
Pipeline for Arabic/Persian/Urdu:
|
| 291 |
1. ArabicReshaper.reshape() — converts Unicode isolated codepoints to
|
| 292 |
contextual presentation forms + joins ligatures.
|
| 293 |
This is MANDATORY for Pillow because FreeType does NOT do this.
|
| 294 |
2. bidi.get_display() — reorders characters right-to-left.
|
|
|
|
| 295 |
Pipeline for Hebrew:
|
| 296 |
bidi.get_display() only — Hebrew has no contextual shaping requirement.
|
|
|
|
| 297 |
Pipeline for Latin/Cyrillic:
|
| 298 |
uppercase only.
|
|
|
|
| 299 |
Pipeline for CJK/Thai/Devanagari:
|
| 300 |
as-is (no uppercase, no bidi needed at the Pillow level).
|
| 301 |
"""
|
|
|
|
| 667 |
) -> list:
|
| 668 |
"""
|
| 669 |
Generates all caption ImageClips ready for compositing.
|
|
|
|
| 670 |
Arabic caption_style recommendations:
|
| 671 |
"cairo_bold" → best for Egyptian/Gulf social media content
|
| 672 |
"tajawal_bold" → modern geometric, dark background, great readability
|
|
|
|
| 867 |
caption_mode: str = "sentence",
|
| 868 |
caption_style: str = "classic",
|
| 869 |
):
|
| 870 |
+
clips = SubtitleManager.create_caption_clips(
|
| 871 |
transcript_data,
|
| 872 |
size = size,
|
| 873 |
language = language,
|
fix_fonts.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import glob
|
| 3 |
+
from test_font import test_arabic_render
|
| 4 |
+
|
| 5 |
+
def cleanup_fonts():
|
| 6 |
+
# List of potentially corrupt font files
|
| 7 |
+
fonts_to_check = ["Tajawal-Bold.ttf", "Cairo-Bold.ttf", "NotoSansArabic-Bold.ttf", "Rubik-Bold.ttf", "Montserrat-Bold.ttf"]
|
| 8 |
+
|
| 9 |
+
print("🧹 Cleaning up potentially corrupt font files...")
|
| 10 |
+
for font in fonts_to_check:
|
| 11 |
+
if os.path.exists(font):
|
| 12 |
+
try:
|
| 13 |
+
size = os.path.getsize(font)
|
| 14 |
+
# Delete if small (likely HTML error page) or just force refresh
|
| 15 |
+
print(f" found {font} ({size} bytes) -> DELETING to force re-download.")
|
| 16 |
+
os.remove(font)
|
| 17 |
+
except Exception as e:
|
| 18 |
+
print(f" ❌ Could not delete {font}: {e}")
|
| 19 |
+
else:
|
| 20 |
+
print(f" {font} not found (good).")
|
| 21 |
+
|
| 22 |
+
print("\n🔄 Re-running font test (this will trigger new downloads)...")
|
| 23 |
+
test_arabic_render()
|
| 24 |
+
|
| 25 |
+
if __name__ == "__main__":
|
| 26 |
+
cleanup_fonts()
|
requirements.txt
CHANGED
|
@@ -16,4 +16,5 @@ openai>=1.0.0
|
|
| 16 |
scipy
|
| 17 |
json_repair
|
| 18 |
tiktoken
|
| 19 |
-
pydantic
|
|
|
|
|
|
| 16 |
scipy
|
| 17 |
json_repair
|
| 18 |
tiktoken
|
| 19 |
+
pydantic
|
| 20 |
+
deep-translator
|
test_arabic.png
ADDED
|