import gradio as gr import json import plotly.express as px import pandas as pd from groq import Groq from fpdf import FPDF from youtube_comment_downloader import YoutubeCommentDownloader import re import os import warnings warnings.filterwarnings("ignore") # ====================== CONFIG ====================== # On Hugging Face, set 'GROQ_API_KEY' in the "Variables and Secrets" settings tab GROQ_API_KEY = os.getenv("GROQ_API_KEY") # ====================== SYSTEM PROMPT ====================== SYSTEM_PROMPT = """ You are an expert social media sentiment and poll analysis AI. Focus on Yes/No, Agree/Disagree, Support/Oppose, and sentiment. Handle English + Urdu + Hindi + other languages well. Return ONLY valid JSON in this exact format: { "main_poll": { "question": "Suggested poll question", "yes_count": int, "no_count": int, "agree_count": int, "disagree_count": int, "support_count": int, "oppose_count": int, "neutral_count": int }, "sentiment": { "positive": float, "negative": float, "neutral": float }, "top_themes": ["theme1", "theme2"], "summary": "Short professional summary", "labeled_comments": [ {"comment": "...", "opinion": "Yes|No|Agree|Disagree|Positive|Negative|Neutral|Mixed"} ] } """ # ====================== HELPERS ====================== def clean_text(text): if not text: return "" text = re.sub(r'[\u2022\u2023\u25CF\u25BA\u25C4]', '-', text) text = re.sub(r'[\u2018\u2019\u201C\u201D]', '"', text) text = re.sub(r'[\u2013\u2014]', '-', text) text = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', text) return text.encode('latin-1', 'ignore').decode('latin-1') # Ensure PDF compatibility def extract_youtube_id(url): patterns = [r'youtu\.be/([a-zA-Z0-9_-]+)', r'v=([a-zA-Z0-9_-]+)', r'/embed/([a-zA-Z0-9_-]+)', r'/shorts/([a-zA-Z0-9_-]+)'] for p in patterns: match = re.search(p, url) if match: return match.group(1) return None def fetch_youtube_comments(url, limit=100): try: video_id = extract_youtube_id(url) if not video_id: return [] downloader = YoutubeCommentDownloader() comments = [] # sort_by=0 is "Newest", 1 is "Top" gen = downloader.get_comments(video_id, sort_by=1) for comment in gen: comments.append(comment['text']) if len(comments) >= limit: break return comments except Exception as e: print(f"Fetch error: {e}") return [] def analyze_comments_with_groq(comments, post_context=""): if not GROQ_API_KEY: return None try: client = Groq(api_key=GROQ_API_KEY) comments_text = "\n\n".join([f"C{i+1}: {clean_text(c)[:200]}" for i, c in enumerate(comments)]) response = client.chat.completions.create( model="llama-3.3-70b-versatile", messages=[ {"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": f"Context: {post_context}\n\nComments:\n{comments_text}"} ], temperature=0.2, response_format={"type": "json_object"} ) return json.loads(response.choices[0].message.content) except Exception as e: print(f"Groq Error: {e}") return None def create_pdf_report(analysis_result, poll_question): pdf = FPDF() pdf.add_page() pdf.set_font('Arial', 'B', 16) pdf.cell(0, 10, 'CommentSurvey AI Report', 0, 1, 'C') pdf.ln(10) pdf.set_font('Arial', 'B', 12) pdf.cell(0, 10, f"Question: {poll_question[:60]}", 0, 1, 'L') pdf.set_font('Arial', '', 11) summary = analysis_result.get('summary', 'N/A') pdf.multi_cell(0, 7, clean_text(summary)) path = "report.pdf" pdf.output(path) return path # ====================== LOGIC ====================== def analyze(url): if not GROQ_API_KEY: return None, "❌ API Key Missing in Hugging Face Secrets", None, None, None, None comments = fetch_youtube_comments(url) if not comments: return None, "❌ Failed to fetch comments.", None, None, None, None result = analyze_comments_with_groq(comments) if not result: return None, "❌ AI Analysis failed.", None, None, None, None main = result.get('main_poll', {}) poll_values = [ main.get('yes_count',0) + main.get('agree_count',0) + main.get('support_count',0), main.get('no_count',0) + main.get('disagree_count',0) + main.get('oppose_count',0), main.get('neutral_count',0) ] fig_poll = px.pie(names=['Yes/Agree/Support', 'No/Disagree/Oppose', 'Neutral'], values=poll_values, title="Poll Distribution", hole=0.4) sent = result.get('sentiment', {}) fig_sent = px.bar(x=['Positive', 'Negative', 'Neutral'], y=[sent.get('positive',0), sent.get('negative',0), sent.get('neutral',0)], title="Sentiment Score", color=['Positive', 'Negative', 'Neutral']) df = pd.DataFrame(result.get('labeled_comments', [])) pdf_path = create_pdf_report(result, main.get('question', 'Analysis')) summary_md = f"### 📝 {main.get('question', 'Analysis')}\n{result.get('summary', '')}" return df, "✅ Analysis Complete", fig_poll, fig_sent, summary_md, pdf_path # ====================== UI ====================== with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 📊 CommentSurvey AI") with gr.Row(): url_input = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...") btn = gr.Button("Analyze", variant="primary") status = gr.Markdown("Status: Ready") with gr.Tabs(): with gr.Tab("Summary"): sum_md = gr.Markdown() with gr.Row(): p1 = gr.Plot() p2 = gr.Plot() with gr.Tab("Data"): table = gr.Dataframe() report_file = gr.File(label="Download PDF Report") btn.click(analyze, inputs=[url_input], outputs=[table, status, p1, p2, sum_md, report_file]) if __name__ == "__main__": demo.launch()