Spaces:
Sleeping
Sleeping
| import re | |
| import os | |
| import gradio as gr | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| from PIL import Image | |
| from io import BytesIO | |
| from googleapiclient.discovery import build | |
| from transformers import pipeline | |
| from langdetect import detect | |
| # ----------------------------- | |
| # Load sentiment model | |
| # ----------------------------- | |
| sentiment = pipeline( | |
| "sentiment-analysis", | |
| model="nlptown/bert-base-multilingual-uncased-sentiment" | |
| ) | |
| # Hidden YouTube API key from Hugging Face secrets | |
| YOUTUBE_API_KEY = os.environ.get("YOUTUBE_API_KEY") | |
| # ----------------------------- | |
| # Helper functions | |
| # ----------------------------- | |
| def truncate_text(text, max_len=512): | |
| """Prevents BERT from crashing by trimming long comments.""" | |
| tokens = text.split() | |
| if len(tokens) > max_len: | |
| return " ".join(tokens[:max_len]) | |
| return text | |
| def extract_video_id(url: str) -> str: | |
| regex = r"(?:v=|youtu\.be/|embed/)([a-zA-Z0-9_-]{11})" | |
| match = re.search(regex, url) | |
| return match.group(1) if match else url | |
| def fetch_comments(video_id, max_results=50): | |
| youtube = build("youtube", "v3", developerKey=YOUTUBE_API_KEY) | |
| request = youtube.commentThreads().list( | |
| part="snippet", | |
| videoId=video_id, | |
| textFormat="plainText", | |
| maxResults=min(max_results, 100) | |
| ) | |
| response = request.execute() | |
| comments_data = [] | |
| for item in response.get("items", []): | |
| snippet = item["snippet"]["topLevelComment"]["snippet"] | |
| comments_data.append({ | |
| "author": snippet.get("authorDisplayName"), | |
| "text": snippet.get("textDisplay"), | |
| "publishedAt": snippet.get("publishedAt"), | |
| "likeCount": snippet.get("likeCount") | |
| }) | |
| return comments_data | |
| def is_french(text: str) -> bool: | |
| try: | |
| return detect(text) == "fr" | |
| except: | |
| return False | |
| def map_star_to_label(star: int) -> str: | |
| if star <= 2: | |
| return "Négatif" | |
| elif star == 3: | |
| return "Neutre" | |
| else: | |
| return "Positif" | |
| def plot_comments_over_time(df): | |
| plt.figure(figsize=(10, 4)) | |
| colors = {"Positif": "green", "Neutre": "orange", "Négatif": "red"} | |
| df["day"] = pd.to_datetime(df["Publié le"]).dt.date | |
| for label in df["Sentiment"].unique(): | |
| subset = df[df["Sentiment"] == label] | |
| plt.scatter( | |
| subset["day"], | |
| [label] * len(subset), | |
| color=colors[label], | |
| label=label, | |
| alpha=0.7 | |
| ) | |
| plt.xlabel("Date") | |
| plt.ylabel("Sentiment") | |
| plt.title("Commentaires par jour avec sentiment") | |
| plt.legend() | |
| plt.xticks(rotation=45) | |
| plt.tight_layout() | |
| buf = BytesIO() | |
| plt.savefig(buf, format="png") | |
| plt.close() | |
| buf.seek(0) | |
| return Image.open(buf) | |
| # ----------------------------- | |
| # Main function | |
| # ----------------------------- | |
| def analyze_comments(video_url, max_comments, uploaded_file): | |
| # ----------------------------- | |
| # CASE 1 — CSV/XLSX UPLOADED | |
| # ----------------------------- | |
| if uploaded_file is not None: | |
| try: | |
| if uploaded_file.name.endswith(".xlsx"): | |
| df_input = pd.read_excel(uploaded_file.name) | |
| else: | |
| df_input = pd.read_csv(uploaded_file.name) | |
| except Exception as e: | |
| return f"Erreur lors de la lecture du fichier : {e}", None, None, None | |
| if "Commentaire" not in df_input.columns: | |
| return "Le fichier doit contenir une colonne 'Commentaire'.", None, None, None | |
| df_input["Auteur"] = df_input.get("Auteur", "Inconnu") | |
| df_input["Publié le"] = df_input.get("Publié le", pd.Timestamp.now()) | |
| texts = df_input["Commentaire"].astype(str).tolist() | |
| # ----------------------------- | |
| # CASE 2 — Fetch from YouTube | |
| # ----------------------------- | |
| else: | |
| if not YOUTUBE_API_KEY: | |
| return "⚠️ API key manquante.", None, None, None | |
| video_id = extract_video_id(video_url) | |
| comments_data = fetch_comments(video_id, max_results=max_comments) | |
| if not comments_data: | |
| return "Aucun commentaire trouvé.", None, None, None | |
| french_comments = [c for c in comments_data if is_french(c["text"])] | |
| if not french_comments: | |
| return "Aucun commentaire en français détecté.", None, None, None | |
| df_input = pd.DataFrame({ | |
| "Auteur": [c["author"] for c in french_comments], | |
| "Commentaire": [c["text"] for c in french_comments], | |
| "Publié le": [c["publishedAt"] for c in french_comments], | |
| "Likes": [c["likeCount"] for c in french_comments], | |
| }) | |
| texts = df_input["Commentaire"].tolist() | |
| # ----------------------------- | |
| # Sentiment analysis (with safety truncation) | |
| # ----------------------------- | |
| texts = [truncate_text(t) for t in texts] # prevents BERT crash | |
| results = sentiment(texts) | |
| labels = [] | |
| scores = [] | |
| for res in results: | |
| star = int(res["label"].split()[0]) | |
| labels.append(map_star_to_label(star)) | |
| scores.append(round(res["score"], 3)) | |
| df_input["Sentiment"] = labels | |
| df_input["Score"] = scores | |
| formatted = [ | |
| f"📝 {row['Commentaire']}\n➡️ {row['Sentiment']} ({row['Score']})" | |
| for _, row in df_input.iterrows() | |
| ] | |
| excel_path = "comments_sentiment.xlsx" | |
| df_input.to_excel(excel_path, index=False) | |
| plot_img = plot_comments_over_time(df_input) | |
| return "", "\n\n".join(formatted), excel_path, plot_img | |
| # ----------------------------- | |
| # Gradio UI | |
| # ----------------------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## 📊 Analyse de sentiments des commentaires YouTube OU d'un fichier CSV/XLSX (FR)") | |
| with gr.Row(): | |
| video_url = gr.Textbox(label="URL de la vidéo YouTube") | |
| uploaded_file = gr.File( | |
| label="Ou téléversez un fichier CSV/XLSX", | |
| file_types=[".csv", ".xlsx"] | |
| ) | |
| max_comments = gr.Slider( | |
| label="Nombre de commentaires YouTube à analyser", | |
| minimum=10, | |
| maximum=200, | |
| step=10, | |
| value=50 | |
| ) | |
| output_msg = gr.Textbox(label="Messages", interactive=False) | |
| output_results = gr.Textbox(label="Résultats", lines=15, interactive=False) | |
| output_file = gr.File(label="Télécharger le fichier Excel") | |
| output_plot = gr.Image(label="Commentaires par jour") | |
| analyze_btn = gr.Button("Analyser les commentaires") | |
| analyze_btn.click( | |
| fn=analyze_comments, | |
| inputs=[video_url, max_comments, uploaded_file], | |
| outputs=[output_msg, output_results, output_file, output_plot] | |
| ) | |
| demo.launch() | |