Spaces:

mohammed777
/

youtube-analyzer-api

Sleeping

App Files Files Community

mohammed777 commited on Jun 22, 2025

Commit

6b2fceb

verified ·

1 Parent(s): f9837bb

Update main.py

Browse files

Files changed (1) hide show

main.py +10 -24

main.py CHANGED Viewed

@@ -19,17 +19,14 @@ import random
 import io
 import base64
 import math
-import time # تم إضافة هذه المكتبة للتحكم في التأخير
 # --- إعدادات NLTK و Logging ---
-# ضبط مسار بيانات NLTK ليطابق Dockerfile
 nltk.data.path.append('/app/nltk_data')
 try:
-    # سيتم تنزيلها مرة واحدة في Dockerfile، ولكن هذا يضمن أنها متاحة
     nltk.download('stopwords', quiet=True)
 except Exception as e:
     logging.error(f"Failed to download NLTK stopwords: {e}")
-    # التعامل مع الخطأ إذا لم يتمكن من التنزيل (مثلاً إذا لم يكن المسار صحيحاً أو صلاحيات)
 arabic_stopwords = set(stopwords.words('arabic'))
@@ -64,7 +61,7 @@ except FileNotFoundError:
 except Exception as e:
     logging.error(f"خطأ غير متوقع أثناء تحميل نموذج تحليل المشاعر: {e}")
-# --- إعدادات User-Agent لـ yt-dlp (جديد) ---
 ua = UserAgent()
 def get_desktop_user_agent():
@@ -78,8 +75,7 @@ selected_user_agent = get_desktop_user_agent()
 headers = {'User-Agent': selected_user_agent}
 # تحديد مسار ملف الكوكيز داخل حاوية Docker
-# تأكد من أن هذا المسار يتطابق مع المكان الذي قمت برفع cookies.txt إليه في Hugging Face Space
-COOKIES_FILE_PATH = 'cookies.txt' # افتراض أن cookies.txt في الجذر /app/
 ydl_opts_video_info = {
     'quiet': True,
@@ -90,7 +86,7 @@ ydl_opts_video_info = {
     'age_limit': 18,
     'force_generic_extractor': False,
     'http_headers': headers,
-    'cookiefile': COOKIES_FILE_PATH # إضافة خيار ملف الكوكيز لـ yt-dlp
 }
 # --- النموذج ---
@@ -121,8 +117,8 @@ def preprocess_text(text):
 # --- معالجة فيديو واحد فقط (نسخة 2) ---
 def process_single_video2(video_url, loaded_quality_model, loaded_sentiment_pipeline, max_comments_per_video=50):
-    # تمرير مسار ملف الكوكيز إلى YoutubeCommentDownloader
-    downloader = YoutubeCommentDownloader(cookies_file=COOKIES_FILE_PATH)
     video_id = extract_video_id(video_url)
     if not video_id:
@@ -130,13 +126,11 @@ def process_single_video2(video_url, loaded_quality_model, loaded_sentiment_pipe
         return None
     try:
-        # إضافة تأخير عشوائي قبل كل طلب معلومات فيديو
-        time.sleep(random.uniform(1, 3)) # تأخير بين 1 و 3 ثوانٍ
         with yt_dlp.YoutubeDL(ydl_opts_video_info) as ydl:
             info_dict = ydl.extract_info(video_url, download=False)
-            # --- تسجيل البيانات المستخلصة ---
             logging.info(f"[فيديو: {video_url}] تم استخراج البيانات: {info_dict.keys()}")
             if not info_dict or info_dict.get('is_live', False) or info_dict.get('age_limit', 0) > 0:
@@ -146,16 +140,13 @@ def process_single_video2(video_url, loaded_quality_model, loaded_sentiment_pipe
             views = info_dict.get('view_count', 0)
             likes = info_dict.get('like_count', 0)
-            # --- تسجيل المشاهدات والإعجابات ---
             logging.info(f"[فيديو: {video_url}] المشاهدات: {views}, الإعجابات: {likes}")
             upload_date = info_dict.get('upload_date', 'Unknown')
             publish_year = int(upload_date[:4]) if upload_date != 'Unknown' else datetime.now().year
-        # إضافة تأخير عشوائي قبل كل طلب جلب تعليقات
-        time.sleep(random.uniform(1, 3)) # تأخير بين 1 و 3 ثوانٍ
-        # --- جلب التعليقات ---
         sampled_comments = []
         try:
             for comment in downloader.get_comments_from_url(video_url):
@@ -171,7 +162,6 @@ def process_single_video2(video_url, loaded_quality_model, loaded_sentiment_pipe
         comment_view_ratio = len(sampled_comments) / views if views > 0 else 0.0
         engagement_score = like_view_ratio + comment_view_ratio
-        # --- تحليل التعليقات ---
         positive_comments = 0
         negative_comments = 0
         overall_sentiment = "لا توجد تعليقات كافية"
@@ -234,7 +224,8 @@ async def evaluate_youtube_playlist_individually_same_method2(youtube_url: str,
     video_links = []
     try:
-        with yt_dlp.YoutubeDL({'extract_flat': True, 'quiet': True, 'playlist_items': '1:10', 'cookiefile': COOKIES_FILE_PATH}) as ydl: # إضافة cookiefile هنا
             playlist_info = ydl.extract_info(youtube_url, download=False)
             if 'entries' in playlist_info:
                 for entry in playlist_info['entries'][:10]:
@@ -249,7 +240,6 @@ async def evaluate_youtube_playlist_individually_same_method2(youtube_url: str,
     individual_results = []
-    # --- معالجة الفيديوهات بالتوازي ---
     with ThreadPoolExecutor(max_workers=max_workers) as executor:
         futures = [
             executor.submit(
@@ -265,10 +255,8 @@ async def evaluate_youtube_playlist_individually_same_method2(youtube_url: str,
             result = future.result()
             if result:
                 individual_results.append(result)
-            # تأخير إضافي بين معالجة نتائج الفيديوهات
             time.sleep(random.uniform(0.5, 1.5))
-    # --- الإحصاء النهائي ---
     num_good_videos = sum(1 for r in individual_results if r and r.get('quality') == 'جيد')
     total_positive_comments = sum(r.get('positive_comments', 0) for r in individual_results if r)
     total_negative_comments = sum(r.get('negative_comments', 0) for r in individual_results if r)
@@ -277,7 +265,6 @@ async def evaluate_youtube_playlist_individually_same_method2(youtube_url: str,
     total_videos = len(individual_results)
     percent_good_videos = (num_good_videos / total_videos) * 100 if total_videos > 0 else 0
-    # --- حساب التقييم العام ---
     if percent_good_videos >= 70:
         overall_quality = "جيد جداً"
     elif percent_good_videos >= 50:
@@ -285,7 +272,6 @@ async def evaluate_youtube_playlist_individually_same_method2(youtube_url: str,
     else:
         overall_quality = "سيء"
-    # --- النظام المركب ---
     WEIGHT_QUALITY = 0.6
     WEIGHT_SENTIMENT = 0.4

 import io
 import base64
 import math
+import time
 # --- إعدادات NLTK و Logging ---
 nltk.data.path.append('/app/nltk_data')
 try:
     nltk.download('stopwords', quiet=True)
 except Exception as e:
     logging.error(f"Failed to download NLTK stopwords: {e}")
 arabic_stopwords = set(stopwords.words('arabic'))
 except Exception as e:
     logging.error(f"خطأ غير متوقع أثناء تحميل نموذج تحليل المشاعر: {e}")
+# --- إعدادات User-Agent لـ yt-dlp ---
 ua = UserAgent()
 def get_desktop_user_agent():
 headers = {'User-Agent': selected_user_agent}
 # تحديد مسار ملف الكوكيز داخل حاوية Docker
+COOKIES_FILE_PATH = 'cookies.txt'
 ydl_opts_video_info = {
     'quiet': True,
     'age_limit': 18,
     'force_generic_extractor': False,
     'http_headers': headers,
+    'cookiefile': COOKIES_FILE_PATH # سيبقى هذا الخيار لـ yt-dlp
 }
 # --- النموذج ---
 # --- معالجة فيديو واحد فقط (نسخة 2) ---
 def process_single_video2(video_url, loaded_quality_model, loaded_sentiment_pipeline, max_comments_per_video=50):
+    # تم حذف cookies_file من هنا
+    downloader = YoutubeCommentDownloader()
     video_id = extract_video_id(video_url)
     if not video_id:
         return None
     try:
+        time.sleep(random.uniform(1, 3))
         with yt_dlp.YoutubeDL(ydl_opts_video_info) as ydl:
             info_dict = ydl.extract_info(video_url, download=False)
             logging.info(f"[فيديو: {video_url}] تم استخراج البيانات: {info_dict.keys()}")
             if not info_dict or info_dict.get('is_live', False) or info_dict.get('age_limit', 0) > 0:
             views = info_dict.get('view_count', 0)
             likes = info_dict.get('like_count', 0)
             logging.info(f"[فيديو: {video_url}] المشاهدات: {views}, الإعجابات: {likes}")
             upload_date = info_dict.get('upload_date', 'Unknown')
             publish_year = int(upload_date[:4]) if upload_date != 'Unknown' else datetime.now().year
+        time.sleep(random.uniform(1, 3))
         sampled_comments = []
         try:
             for comment in downloader.get_comments_from_url(video_url):
         comment_view_ratio = len(sampled_comments) / views if views > 0 else 0.0
         engagement_score = like_view_ratio + comment_view_ratio
         positive_comments = 0
         negative_comments = 0
         overall_sentiment = "لا توجد تعليقات كافية"
     video_links = []
     try:
+        # خيار cookiefile لـ yt-dlp يبقى هنا
+        with yt_dlp.YoutubeDL({'extract_flat': True, 'quiet': True, 'playlist_items': '1:10', 'cookiefile': COOKIES_FILE_PATH}) as ydl:
             playlist_info = ydl.extract_info(youtube_url, download=False)
             if 'entries' in playlist_info:
                 for entry in playlist_info['entries'][:10]:
     individual_results = []
     with ThreadPoolExecutor(max_workers=max_workers) as executor:
         futures = [
             executor.submit(
             result = future.result()
             if result:
                 individual_results.append(result)
             time.sleep(random.uniform(0.5, 1.5))
     num_good_videos = sum(1 for r in individual_results if r and r.get('quality') == 'جيد')
     total_positive_comments = sum(r.get('positive_comments', 0) for r in individual_results if r)
     total_negative_comments = sum(r.get('negative_comments', 0) for r in individual_results if r)
     total_videos = len(individual_results)
     percent_good_videos = (num_good_videos / total_videos) * 100 if total_videos > 0 else 0
     if percent_good_videos >= 70:
         overall_quality = "جيد جداً"
     elif percent_good_videos >= 50:
     else:
         overall_quality = "سيء"
     WEIGHT_QUALITY = 0.6
     WEIGHT_SENTIMENT = 0.4