Spaces:

mohammed777
/

youtube-analyzer-api

Sleeping

App Files Files Community

mohammed777 commited on Jun 22, 2025

Commit

5ea3020

verified ·

1 Parent(s): d7678ca

Upload 3 files

Browse files

Files changed (3) hide show

app.py +111 -0
main.py +302 -0
requirements.txt +16 -0

app.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import gradio as gr
+import requests
+import json
+# عنوان الـ API الخاص بـ FastAPI
+# يجب استبدال هذا بعنوان الـ API الفعلي الخاص بك على Hugging Face
+# مثال: "https://<your-space-name>.hf.space" إذا كان هو الـ Space الرئيسي
+# أو "https://<your-space-name>.hf.space/evaluate_youtube_playlist_individually_same_method2/"
+# إذا كنت تستخدم مسارًا فرعيًا. تأكد من المسار الصحيح.
+FASTAPI_API_URL = "http://localhost:8000/evaluate_youtube_playlist_individually_same_method2/"
+# إذا كان تطبيق FastAPI الخاص بك يعمل على Hugging Face، فاستبدل localhost بعنوان URL الخاص بـ Hugging Face Space.
+# مثال: "https://YOUR_HF_SPACE_NAME.hf.space/evaluate_youtube_playlist_individually_same_method2/"
+# تذكر أن Hugging Face قد توفر عنوان URL للقاعدة (root) فقط، وتحتاج إلى إضافة المسار إلى نقطة النهاية.
+def evaluate_playlist_with_fastapi(youtube_url: str, max_comments_per_video: int = 50, max_workers: int = 3):
+    """
+    تستدعي نقطة النهاية FastAPI API لتقييم قائمة تشغيل YouTube.
+    """
+    # تهيئة رسالة الخطأ الافتراضية
+    error_message = ""
+    # التحقق من أن الرابط ليس فارغًا
+    if not youtube_url:
+        error_message = "الرجاء إدخال رابط قائمة تشغيل أو فيديو يوتيوب."
+        gr.Warning(error_message) # عرض تحذير في واجهة Gradio
+        return error_message, error_message, error_message, error_message, error_message, error_message # إرجاع قيم فارغة أو رسالة خطأ لجميع المخرجات
+    payload = {
+        "youtube_url": youtube_url,
+        "max_comments_per_video": max_comments_per_video,
+        "max_workers": max_workers
+    }
+    headers = {"Content-Type": "application/json"}
+    try:
+        # إرسال طلب POST إلى API
+        response = requests.post(FASTAPI_API_URL, data=json.dumps(payload), headers=headers)
+        response.raise_for_status() # إثارة استثناء لأكواد حالة HTTP 4xx/5xx
+        result = response.json()
+        # التحقق من وجود مفتاح "error" في الاستجابة
+        if "error" in result:
+            error_message = f"خطأ من API: {result['error']}"
+            gr.Warning(error_message) # عرض تحذير في واجهة Gradio
+            return error_message, error_message, error_message, error_message, error_message, error_message
+        # استخراج النتائج المطلوبة
+        overall_quality = result.get("overall_quality", "غير متوفر")
+        composite_quality = result.get("composite_quality", "غير متوفر")
+        composite_score = result.get("composite_score", "غير متوفر")
+        percent_good_videos = result.get("percent_good_videos", "غير متوفر")
+        positive_ratio = result.get("positive_ratio", "غير متوفر")
+        negative_ratio = result.get("negative_ratio", "غير متوفر")
+        # إرجاع النتائج للعرض في Gradio
+        return (f"جودة عامة: {overall_quality}",
+                f"جودة مركبة: {composite_quality}",
+                f"النقاط المركبة: {composite_score}",
+                f"نسبة الفيديوهات الجيدة: {percent_good_videos}%",
+                f"نسبة التعليقات الإيجابية: {positive_ratio}%",
+                f"نسبة التعليقات السلبية: {negative_ratio}%")
+    except requests.exceptions.ConnectionError as e:
+        error_message = f"خطأ في الاتصال بـ API: {e}. تأكد من أن API يعمل والـ URL صحيح."
+        gr.Error(error_message) # عرض خطأ في واجهة Gradio
+    except requests.exceptions.Timeout as e:
+        error_message = f"انتهت مهلة طلب API: {e}. قد يكون API مشغولًا."
+        gr.Error(error_message)
+    except requests.exceptions.RequestException as e:
+        error_message = f"خطأ عام في الطلب: {e}. الاستجابة: {response.text if 'response' in locals() else 'لا توجد استجابة'}"
+        gr.Error(error_message)
+    except json.JSONDecodeError as e:
+        error_message = f"خطأ في تحليل استجابة JSON من API: {e}. الاستجابة الخام: {response.text if 'response' in locals() else 'لا توجد استجابة'}"
+        gr.Error(error_message)
+    except Exception as e:
+        error_message = f"حدث خطأ غير متوقع: {e}"
+        gr.Error(error_message)
+    # في حالة أي خطأ، أرجع رسالة الخطأ لجميع المخرجات
+    return error_message, error_message, error_message, error_message, error_message, error_message
+# بناء واجهة Gradio
+with gr.Blocks(title="تقييم قائمة تشغيل يوتيوب") as demo:
+    gr.Markdown("# تقييم جودة قائمة تشغيل يوتيوب")
+    gr.Markdown("هذه الواجهة تستخدم نموذج تحليل المشاعر والجودة لتقييم الفيديوهات.")
+    with gr.Row():
+        with gr.Column():
+            youtube_url_input = gr.Textbox(label="رابط قائمة تشغيل/فيديو يوتيوب", placeholder="الصق رابط يوتيوب هنا...")
+            max_comments_input = gr.Slider(minimum=10, maximum=200, value=50, step=10, label="الحد الأقصى للتعليقات لكل فيديو")
+            max_workers_input = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="الحد الأقصى للعمال المتوازيين")
+            submit_button = gr.Button("تقييم قائمة التشغيل")
+        with gr.Column():
+            overall_quality_output = gr.Textbox(label="جودة قائمة التشغيل الإجمالية")
+            composite_quality_output = gr.Textbox(label="الجودة المركبة")
+            composite_score_output = gr.Textbox(label="النقاط المركبة")
+            percent_good_videos_output = gr.Textbox(label="نسبة الفيديوهات الجيدة")
+            positive_ratio_output = gr.Textbox(label="نسبة التعليقات الإيجابية")
+            negative_ratio_output = gr.Textbox(label="نسبة التعليقات السلبية")
+    # ربط الزر بالدالة وتحديد المدخلات والمخرجات
+    submit_button.click(
+        evaluate_playlist_with_fastapi,
+        inputs=[youtube_url_input, max_comments_input, max_workers_input],
+        outputs=[overall_quality_output, composite_quality_output, composite_score_output,
+                 percent_good_videos_output, positive_ratio_output, negative_ratio_output]
+    )
+demo.launch(debug=True)

main.py ADDED Viewed

	@@ -0,0 +1,302 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+import pickle
+import pandas as pd
+import numpy as np
+import yt_dlp
+from youtube_comment_downloader import YoutubeCommentDownloader
+from datetime import datetime
+import re
+import string
+import nltk
+import emoji
+from urllib.parse import urlparse, parse_qs
+from nltk.corpus import stopwords
+import logging # جديد
+from concurrent.futures import ThreadPoolExecutor # جديد
+from fake_useragent import UserAgent # جديد
+import random # جديد
+# --- إعدادات NLTK و Logging ---
+# ضبط مسار بيانات NLTK ليطابق Dockerfile
+nltk.data.path.append('/app/nltk_data')
+try:
+    # سيتم تنزيلها مرة واحدة في Dockerfile، ولكن هذا يضمن أنها متاحة
+    nltk.download('stopwords', quiet=True)
+except Exception as e:
+    logging.error(f"Failed to download NLTK stopwords: {e}")
+    # التعامل مع الخطأ إذا لم يتمكن من التنزيل (مثلاً إذا لم يكن المسار صحيحاً أو صلاحيات)
+arabic_stopwords = set(stopwords.words('arabic'))
+# --- إعداد التسجيل ---
+logging.basicConfig(
+    filename='youtube_scraper.log',
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+app = FastAPI()
+# تحميل النماذج
+loaded_quality_model = None
+loaded_sentiment_pipeline = None
+try:
+    with open('final_youtube_quality_model.pkl', 'rb') as f:
+        loaded_quality_model = pickle.load(f)
+    logging.info("تم تحميل نموذج جودة الفيديو بنجاح.")
+except FileNotFoundError:
+    logging.error("خطأ: لم يتم العثور على ملف النموذج 'final_youtube_quality_model.pkl'.")
+except Exception as e:
+    logging.error(f"خطأ غير متوقع أثناء تحميل نموذج جودة الفيديو: {e}")
+try:
+    with open('best_sentiment_pipeline.pkl', 'rb') as f:
+        loaded_sentiment_pipeline = pickle.load(f)
+    logging.info("تم تحميل نموذج تصنيف المشاعر بنجاح.")
+except FileNotFoundError:
+    logging.error("خطأ: لم يتم العثور على ملف النموذج 'best_sentiment_pipeline.pkl'.")
+except Exception as e:
+    logging.error(f"خطأ غير متوقع أثناء تحميل نموذج تحليل المشاعر: {e}")
+# --- إعدادات User-Agent لـ yt-dlp (جديد) ---
+ua = UserAgent()
+def get_desktop_user_agent():
+    while True:
+        candidate = random.choice([ua.chrome, ua.firefox, ua.safari])
+        if all(x not in candidate for x in ['Mobile', 'Android', 'iPhone', 'iPad']):
+            return candidate
+selected_user_agent = get_desktop_user_agent()
+headers = {'User-Agent': selected_user_agent}
+ydl_opts_video_info = {
+    'quiet': True,
+    'skip_download': True,
+    'extract_flat': True,
+    'ignoreerrors': True,
+    'no_warnings': True,
+    'age_limit': 18,
+    'force_generic_extractor': False,
+    'http_headers': headers
+}
+# --- النموذج ---
+class PlaylistRequest(BaseModel):
+    playlist_url: str
+# --- دالة استخراج ID الفيديو ---
+def extract_video_id(url):
+    if 'youtu.be/' in url:
+        return url.split('/')[-1].split('?')[0]
+    elif 'watch?v=' in url:
+        return parse_qs(urlparse(url).query).get('v', [None])[0]
+    return None
+# # --- تنظيف التعليقات ---
+# arabic_stopwords = set(stopwords.words('arabic'))
+def preprocess_text(text):
+    if not isinstance(text, str):
+        return ""
+    text = emoji.demojize(text)
+    text = re.sub(r'http\S+', '', text)
+    text = text.translate(str.maketrans('', '', string.punctuation + string.digits))
+    text = text.lower()
+    text = re.sub(r'\s+', ' ', text).strip()
+    text_tokens = text.split()
+    filtered_text = [word for word in text_tokens if word not in arabic_stopwords]
+    return ' '.join(filtered_text)
+# --- معالجة فيديو واحد فقط (نسخة 2) ---
+def process_single_video2(video_url, loaded_quality_model, loaded_sentiment_pipeline, max_comments_per_video=50):
+    downloader = YoutubeCommentDownloader()
+    video_id = extract_video_id(video_url)
+    if not video_id:
+        logging.warning(f"رابط فيديو غير صالح: {video_url}. تم تجاهله.")
+        return None
+    try:
+        with yt_dlp.YoutubeDL(ydl_opts_video_info) as ydl:
+            info_dict = ydl.extract_info(video_url, download=False)
+            # --- تسجيل البيانات المستخلصة ---
+            logging.info(f"[فيديو: {video_url}] تم استخراج البيانات: {info_dict.keys()}")
+            if not info_dict or info_dict.get('is_live', False) or info_dict.get('age_limit', 0) > 0:
+                logging.warning(f"لا يمكن معالجة الفيديو {video_id}: مباشر أو مقيد عمرًا. تم تجاهله.")
+                return None
+            views = info_dict.get('view_count', 0)
+            likes = info_dict.get('like_count', 0)
+            # --- تسجيل المشاهدات والإعجابات ---
+            logging.info(f"[فيديو: {video_url}] المشاهدات: {views}, الإعجابات: {likes}")
+            upload_date = info_dict.get('upload_date', 'Unknown')
+            publish_year = int(upload_date[:4]) if upload_date != 'Unknown' else datetime.datetime.now().year
+        # --- جلب التعليقات ---
+        sampled_comments = []
+        try:
+            for comment in downloader.get_comments_from_url(video_url):
+                if 'text' in comment:
+                    sampled_comments.append(comment['text'])
+                    if len(sampled_comments) >= max_comments_per_video:
+                        break
+        except Exception as e:
+            logging.warning(f"فشل في جلب التعليقات للفيديو {video_id}. السبب: {e}.")
+            sampled_comments = []
+        like_view_ratio = likes / views if views > 0 else 0.0
+        comment_view_ratio = len(sampled_comments) / views if views > 0 else 0.0
+        engagement_score = like_view_ratio + comment_view_ratio
+        # --- تحليل التعليقات ---
+        positive_comments = 0
+        negative_comments = 0
+        overall_sentiment = "لا توجد تعليقات كافية"
+        if sampled_comments:
+            processed_comments = [preprocess_text(c) for c in sampled_comments]
+            sentiment_predictions = loaded_sentiment_pipeline.predict(processed_comments)
+            positive_comments = np.sum(sentiment_predictions == 1)
+            negative_comments = np.sum(sentiment_predictions == 0)
+            if positive_comments > negative_comments:
+                overall_sentiment = "إيجابي"
+            elif negative_comments > positive_comments:
+                overall_sentiment = "سلبي"
+            else:
+                overall_sentiment = "محايد"
+        input_df = pd.DataFrame([[views, likes, len(sampled_comments), 0, publish_year,
+                                  like_view_ratio, comment_view_ratio, engagement_score]],
+                                columns=['views_count', 'likes_count', 'comments_count',
+                                         'video_duration_seconds', 'publish_year',
+                                         'like_view_ratio', 'comment_view_ratio', 'engagement_score'])
+        playlist_quality = "لم يتم التقييم"
+        try:
+            prediction_numeric = loaded_quality_model.predict(input_df)[0]
+            logging.info(f"[فيديو: {video_url}] نتيجة التنبؤ: {prediction_numeric}")  # تسجيل النتيجة
+            playlist_quality = "جيد" if prediction_numeric == 1 else "سيء"
+        except Exception as e:
+            playlist_quality = f"خطأ في التقييم: {e}"
+            logging.error(f"[فيديو: {video_url}] خطأ في تقييم الفيديو: {e}")
+        return {
+            "video_url": video_url,
+            "views": views,
+            "likes": likes,
+            "comments": len(sampled_comments),
+            "like_view_ratio": like_view_ratio,
+            "comment_view_ratio": comment_view_ratio,
+            "engagement_score": engagement_score,
+            "quality": playlist_quality,
+            "sentiment": overall_sentiment,
+            "positive_comments": positive_comments,
+            "negative_comments": negative_comments
+        }
+    except Exception as e:
+        logging.error(f"حدث خطأ في الفيديو {video_url}: {e}")
+        return None
+# --- نفس الدالة ولكن صحيحة مع Pydantic (بعد التعديلات) ---
+@app.post("/evaluate_youtube_playlist_individually_same_method2/")
+async def evaluate_youtube_playlist_individually_same_method2(youtube_url, max_comments_per_video=50, max_workers=3):
+    """
+    تقييم قائمة تشغيل يوتيوب باستخدام نظام مركب (نسخة 2)
+    """
+    if loaded_quality_model is None or loaded_sentiment_pipeline is None:
+        logging.error("لم يتم تحميل النماذج المطلوبة.")
+        return {"error": "لم يتم تحميل النماذج المطلوبة."}
+    video_links = []
+    try:
+        with yt_dlp.YoutubeDL({'extract_flat': True, 'quiet': True, 'playlist_items': '1:10'}) as ydl:
+            playlist_info = ydl.extract_info(youtube_url, download=False)
+            if 'entries' in playlist_info:
+                for entry in playlist_info['entries'][:10]:
+                    if entry and 'url' in entry:
+                        video_links.append(entry['url'])
+            else:
+                logging.warning("لا توجد فيديوهات في هذه القائمة.")
+                return {"error": "لا توجد فيديوهات في هذه القائمة."}
+    except Exception as e:
+        logging.error(f"فشل في جلب روابط الفيديو: {e}")
+        return {"error": f"فشل في جلب روابط الفيديو: {e}"}
+    individual_results = []
+    # --- معالجة الفيديوهات بالتوازي ---
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        futures = [
+            executor.submit(
+                process_single_video2,
+                video_url,
+                loaded_quality_model,
+                loaded_sentiment_pipeline,
+                max_comments_per_video
+            ) for video_url in video_links
+        ]
+        for future in tqdm(futures, total=len(video_links), desc="معالجة الفيديوهات"):
+            result = future.result()
+            if result:
+                individual_results.append(result)
+    # --- الإحصاء النهائي ---
+    num_good_videos = sum(1 for r in individual_results if r['quality'] == 'جيد')
+    total_positive_comments = sum(r['positive_comments'] for r in individual_results)
+    total_negative_comments = sum(r['negative_comments'] for r in individual_results)
+    total_classified_comments = total_positive_comments + total_negative_comments
+    total_videos = len(individual_results)
+    percent_good_videos = (num_good_videos / total_videos) * 100 if total_videos > 0 else 0
+    # --- حساب التقييم العام ---
+    if percent_good_videos >= 70:
+        overall_quality = "جيد جداً"
+    elif percent_good_videos >= 50:
+        overall_quality = "جيد"
+    else:
+        overall_quality = "سيء"
+    # --- النظام المركب ---
+    WEIGHT_QUALITY = 0.6
+    WEIGHT_SENTIMENT = 0.4
+    positive_ratio = (total_positive_comments / total_classified_comments) * 100 if total_classified_comments > 0 else 0.0
+    composite_score = (WEIGHT_QUALITY * percent_good_videos) + (WEIGHT_SENTIMENT * positive_ratio)
+    if composite_score >= 75:
+        composite_quality = "جيد جداً"
+    elif composite_score >= 60:
+        composite_quality = "جيد"
+    elif composite_score >= 45:
+        composite_quality = "متوسط"
+    else:
+        composite_quality = "سيء"
+    return {
+        "overall_quality": overall_quality,
+        "composite_quality": composite_quality,
+        "composite_score": round(composite_score, 1),
+        "percent_good_videos": round(percent_good_videos, 1),
+        "positive_ratio": round(positive_ratio, 1),
+        "negative_ratio": round(100 - positive_ratio, 1),
+    }

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+fastapi
+uvicorn[standard]
+python-multipart
+yt-dlp
+youtube-comment-downloader
+nltk
+emoji
+pandas
+numpy
+# استخدم الإصدار الدقيق الذي دربت عليه النماذج
+scikit-learn==1.6.1
+gunicorn
+fake-useragent  # <--- جديد
+tqdm
+gradio
+requests