aliSaac510 commited on
Commit
276a4df
·
1 Parent(s): 0890748

fix arabic fonts

Browse files
core/analyze.py CHANGED
@@ -70,7 +70,7 @@ def analyze_transcript(transcript):
70
  for attempt in range(max_retries):
71
  try:
72
  response = client.chat.completions.create(
73
- model="deepseek/deepseek-chat",
74
  messages=[
75
  {"role": "system", "content": "You are a helpful assistant that outputs only valid JSON."},
76
  {"role": "user", "content": prompt}
 
70
  for attempt in range(max_retries):
71
  try:
72
  response = client.chat.completions.create(
73
+ model="nvidia/nemotron-3-nano-30b-a3b:free",
74
  messages=[
75
  {"role": "system", "content": "You are a helpful assistant that outputs only valid JSON."},
76
  {"role": "user", "content": prompt}
core/free_translator.py CHANGED
@@ -1,70 +1,35 @@
1
- import os
2
- import json
3
- import urllib.request
4
  import urllib.parse
 
5
 
6
  class FreeTranslator:
7
  def __init__(self):
 
8
  pass
9
-
10
- def translate_text(self, text, target_language_code, source_language_code="en"):
11
- """ترجمة مجانية باستخدام MyMemory API بدون httpx"""
12
- if not text.strip():
13
- return "", []
14
 
15
- # Handle same language case
16
- if source_language_code.lower() == target_language_code.lower():
17
- return text, []
18
-
19
- target_lang = target_language_code.lower()
20
- source_lang = source_language_code.lower()
 
21
 
 
 
 
 
22
  try:
23
- # استخدام urllib بدلاً من requests لتجنب مشكلة httpx
24
- url = "https://api.mymemory.translated.net/get"
25
- params = {
26
- 'q': text,
27
- 'langpair': f'{source_lang}|{target_lang}'
28
- }
29
 
30
- # بناء URL مع parameters
31
- full_url = url + '?' + urllib.parse.urlencode(params)
32
-
33
- # استخدام urllib.request بدلاً من requests
34
- req = urllib.request.Request(full_url)
35
- req.add_header('User-Agent', 'Mozilla/5.0')
36
-
37
- with urllib.request.urlopen(req, timeout=10) as response:
38
- if response.status == 200:
39
- data = json.loads(response.read().decode())
40
- if data.get('responseStatus') == 200:
41
- translated_text = data['responseData']['translatedText']
42
-
43
- # تحديد الكلمات المهمة
44
- words = translated_text.split()
45
- highlight_words = []
46
-
47
- # كلمات حماسية شائعة
48
- exciting_words = [
49
- "amazing", "incredible", "awesome", "fantastic", "perfect", "best", "ultimate",
50
- "رائع", "مذهل", "أفضل", "مثالي", "خرافي", "لا يصدق", "عجيب"
51
- ]
52
-
53
- for word in words:
54
- clean_word = word.lower().strip(".,!?")
55
- if clean_word in exciting_words:
56
- highlight_words.append(word)
57
-
58
- # إذا مفيش كلمات حماسية، نختار أطول كلمتين
59
- if not highlight_words and len(words) >= 2:
60
- sorted_words = sorted(words, key=len, reverse=True)
61
- highlight_words = sorted_words[:2]
62
-
63
- return translated_text, highlight_words
64
-
65
- # fallback: إرجاع النص الأصلي
66
- return text, []
67
 
 
 
68
  except Exception as e:
69
- print(f"⚠️ Error in free translation: {e}")
70
- return text, []
 
1
+ import requests
 
 
2
  import urllib.parse
3
+ from deep_translator import GoogleTranslator
4
 
5
  class FreeTranslator:
6
  def __init__(self):
7
+ # deep-translator does not need base_url or API key
8
  pass
 
 
 
 
 
9
 
10
+ def translate_text(self, text, target_language_code, source_language_code="en"):
11
+ """
12
+ Translates text using deep-translator (Google Translate Web Interface).
13
+ Free, no API key required, and better context handling.
14
+ """
15
+ if not text or not text.strip():
16
+ return text, "No text provided"
17
 
18
+ # If source and target are same, return original
19
+ if source_language_code == target_language_code:
20
+ return text, None
21
+
22
  try:
23
+ # deep-translator handles long text and context better
24
+ # Note: GoogleTranslator auto-detects source if not provided, but we pass it for accuracy.
25
+ # 'auto' is supported by deep-translator if source is unknown.
26
+ src = source_language_code if source_language_code else 'auto'
 
 
27
 
28
+ translator = GoogleTranslator(source=src, target=target_language_code)
29
+ translated = translator.translate(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ return translated, None
32
+
33
  except Exception as e:
34
+ # Fallback or error reporting
35
+ return text, f"Translation error: {str(e)}"
core/stt.py CHANGED
@@ -308,7 +308,7 @@ class STT:
308
 
309
  segments_iter, info = self.model.transcribe(
310
  video_path,
311
- beam_size=5,
312
  word_timestamps=True, # Always needed for standards & highlight_word
313
  language=actual_stt_lang,
314
  vad_filter=vad_filter,
 
308
 
309
  segments_iter, info = self.model.transcribe(
310
  video_path,
311
+ beam_size=1,
312
  word_timestamps=True, # Always needed for standards & highlight_word
313
  language=actual_stt_lang,
314
  vad_filter=vad_filter,
core/subtitle_manager.py CHANGED
@@ -1,7 +1,6 @@
1
  """
2
  SubtitleManager — Viral YouTube Shorts Caption Engine
3
  Styles tuned for 2024-2025 Shorts/Reels/TikTok viral aesthetics.
4
-
5
  ✅ Fixes & Improvements:
6
  - Shadow layers now actually rendered (was `pass` in original)
7
  - active_word_index (int) replaces unreliable id() comparison
@@ -288,19 +287,15 @@ def _is_arabic_script(text: str) -> bool:
288
  def _prepare_display_text(raw: str, is_rtl: bool, language: str = None) -> str:
289
  """
290
  Prepares text for correct rendering in Pillow (PIL.ImageDraw).
291
-
292
  Pipeline for Arabic/Persian/Urdu:
293
  1. ArabicReshaper.reshape() — converts Unicode isolated codepoints to
294
  contextual presentation forms + joins ligatures.
295
  This is MANDATORY for Pillow because FreeType does NOT do this.
296
  2. bidi.get_display() — reorders characters right-to-left.
297
-
298
  Pipeline for Hebrew:
299
  bidi.get_display() only — Hebrew has no contextual shaping requirement.
300
-
301
  Pipeline for Latin/Cyrillic:
302
  uppercase only.
303
-
304
  Pipeline for CJK/Thai/Devanagari:
305
  as-is (no uppercase, no bidi needed at the Pillow level).
306
  """
@@ -672,7 +667,6 @@ class SubtitleManager:
672
  ) -> list:
673
  """
674
  Generates all caption ImageClips ready for compositing.
675
-
676
  Arabic caption_style recommendations:
677
  "cairo_bold" → best for Egyptian/Gulf social media content
678
  "tajawal_bold" → modern geometric, dark background, great readability
@@ -873,7 +867,7 @@ class SubtitleManager:
873
  caption_mode: str = "sentence",
874
  caption_style: str = "classic",
875
  ):
876
- clips = SubtitleManager.create_caption_clips(
877
  transcript_data,
878
  size = size,
879
  language = language,
 
1
  """
2
  SubtitleManager — Viral YouTube Shorts Caption Engine
3
  Styles tuned for 2024-2025 Shorts/Reels/TikTok viral aesthetics.
 
4
  ✅ Fixes & Improvements:
5
  - Shadow layers now actually rendered (was `pass` in original)
6
  - active_word_index (int) replaces unreliable id() comparison
 
287
  def _prepare_display_text(raw: str, is_rtl: bool, language: str = None) -> str:
288
  """
289
  Prepares text for correct rendering in Pillow (PIL.ImageDraw).
 
290
  Pipeline for Arabic/Persian/Urdu:
291
  1. ArabicReshaper.reshape() — converts Unicode isolated codepoints to
292
  contextual presentation forms + joins ligatures.
293
  This is MANDATORY for Pillow because FreeType does NOT do this.
294
  2. bidi.get_display() — reorders characters right-to-left.
 
295
  Pipeline for Hebrew:
296
  bidi.get_display() only — Hebrew has no contextual shaping requirement.
 
297
  Pipeline for Latin/Cyrillic:
298
  uppercase only.
 
299
  Pipeline for CJK/Thai/Devanagari:
300
  as-is (no uppercase, no bidi needed at the Pillow level).
301
  """
 
667
  ) -> list:
668
  """
669
  Generates all caption ImageClips ready for compositing.
 
670
  Arabic caption_style recommendations:
671
  "cairo_bold" → best for Egyptian/Gulf social media content
672
  "tajawal_bold" → modern geometric, dark background, great readability
 
867
  caption_mode: str = "sentence",
868
  caption_style: str = "classic",
869
  ):
870
+ clips = SubtitleManager.create_caption_clips(
871
  transcript_data,
872
  size = size,
873
  language = language,
fix_fonts.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import glob
3
+ from test_font import test_arabic_render
4
+
5
+ def cleanup_fonts():
6
+ # List of potentially corrupt font files
7
+ fonts_to_check = ["Tajawal-Bold.ttf", "Cairo-Bold.ttf", "NotoSansArabic-Bold.ttf", "Rubik-Bold.ttf", "Montserrat-Bold.ttf"]
8
+
9
+ print("🧹 Cleaning up potentially corrupt font files...")
10
+ for font in fonts_to_check:
11
+ if os.path.exists(font):
12
+ try:
13
+ size = os.path.getsize(font)
14
+ # Delete if small (likely HTML error page) or just force refresh
15
+ print(f" found {font} ({size} bytes) -> DELETING to force re-download.")
16
+ os.remove(font)
17
+ except Exception as e:
18
+ print(f" ❌ Could not delete {font}: {e}")
19
+ else:
20
+ print(f" {font} not found (good).")
21
+
22
+ print("\n🔄 Re-running font test (this will trigger new downloads)...")
23
+ test_arabic_render()
24
+
25
+ if __name__ == "__main__":
26
+ cleanup_fonts()
requirements.txt CHANGED
@@ -16,4 +16,5 @@ openai>=1.0.0
16
  scipy
17
  json_repair
18
  tiktoken
19
- pydantic
 
 
16
  scipy
17
  json_repair
18
  tiktoken
19
+ pydantic
20
+ deep-translator
test_arabic.png ADDED