Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import json | |
| import plotly.express as px | |
| import pandas as pd | |
| from groq import Groq | |
| from fpdf import FPDF | |
| from youtube_comment_downloader import YoutubeCommentDownloader | |
| import re | |
| import os | |
| import warnings | |
| warnings.filterwarnings("ignore") | |
| # ====================== CONFIG ====================== | |
| # On Hugging Face, set 'GROQ_API_KEY' in the "Variables and Secrets" settings tab | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| # ====================== SYSTEM PROMPT ====================== | |
| SYSTEM_PROMPT = """ | |
| You are an expert social media sentiment and poll analysis AI. | |
| Focus on Yes/No, Agree/Disagree, Support/Oppose, and sentiment. | |
| Handle English + Urdu + Hindi + other languages well. | |
| Return ONLY valid JSON in this exact format: | |
| { | |
| "main_poll": { | |
| "question": "Suggested poll question", | |
| "yes_count": int, | |
| "no_count": int, | |
| "agree_count": int, | |
| "disagree_count": int, | |
| "support_count": int, | |
| "oppose_count": int, | |
| "neutral_count": int | |
| }, | |
| "sentiment": { | |
| "positive": float, | |
| "negative": float, | |
| "neutral": float | |
| }, | |
| "top_themes": ["theme1", "theme2"], | |
| "summary": "Short professional summary", | |
| "labeled_comments": [ | |
| {"comment": "...", "opinion": "Yes|No|Agree|Disagree|Positive|Negative|Neutral|Mixed"} | |
| ] | |
| } | |
| """ | |
| # ====================== HELPERS ====================== | |
| def clean_text(text): | |
| if not text: return "" | |
| text = re.sub(r'[\u2022\u2023\u25CF\u25BA\u25C4]', '-', text) | |
| text = re.sub(r'[\u2018\u2019\u201C\u201D]', '"', text) | |
| text = re.sub(r'[\u2013\u2014]', '-', text) | |
| text = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', text) | |
| return text.encode('latin-1', 'ignore').decode('latin-1') # Ensure PDF compatibility | |
| def extract_youtube_id(url): | |
| patterns = [r'youtu\.be/([a-zA-Z0-9_-]+)', r'v=([a-zA-Z0-9_-]+)', r'/embed/([a-zA-Z0-9_-]+)', r'/shorts/([a-zA-Z0-9_-]+)'] | |
| for p in patterns: | |
| match = re.search(p, url) | |
| if match: return match.group(1) | |
| return None | |
| def fetch_youtube_comments(url, limit=100): | |
| try: | |
| video_id = extract_youtube_id(url) | |
| if not video_id: return [] | |
| downloader = YoutubeCommentDownloader() | |
| comments = [] | |
| # sort_by=0 is "Newest", 1 is "Top" | |
| gen = downloader.get_comments(video_id, sort_by=1) | |
| for comment in gen: | |
| comments.append(comment['text']) | |
| if len(comments) >= limit: break | |
| return comments | |
| except Exception as e: | |
| print(f"Fetch error: {e}") | |
| return [] | |
| def analyze_comments_with_groq(comments, post_context=""): | |
| if not GROQ_API_KEY: return None | |
| try: | |
| client = Groq(api_key=GROQ_API_KEY) | |
| comments_text = "\n\n".join([f"C{i+1}: {clean_text(c)[:200]}" for i, c in enumerate(comments)]) | |
| response = client.chat.completions.create( | |
| model="llama-3.3-70b-versatile", | |
| messages=[ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| {"role": "user", "content": f"Context: {post_context}\n\nComments:\n{comments_text}"} | |
| ], | |
| temperature=0.2, | |
| response_format={"type": "json_object"} | |
| ) | |
| return json.loads(response.choices[0].message.content) | |
| except Exception as e: | |
| print(f"Groq Error: {e}") | |
| return None | |
| def create_pdf_report(analysis_result, poll_question): | |
| pdf = FPDF() | |
| pdf.add_page() | |
| pdf.set_font('Arial', 'B', 16) | |
| pdf.cell(0, 10, 'CommentSurvey AI Report', 0, 1, 'C') | |
| pdf.ln(10) | |
| pdf.set_font('Arial', 'B', 12) | |
| pdf.cell(0, 10, f"Question: {poll_question[:60]}", 0, 1, 'L') | |
| pdf.set_font('Arial', '', 11) | |
| summary = analysis_result.get('summary', 'N/A') | |
| pdf.multi_cell(0, 7, clean_text(summary)) | |
| path = "report.pdf" | |
| pdf.output(path) | |
| return path | |
| # ====================== LOGIC ====================== | |
| def analyze(url): | |
| if not GROQ_API_KEY: | |
| return None, "β API Key Missing in Hugging Face Secrets", None, None, None, None | |
| comments = fetch_youtube_comments(url) | |
| if not comments: | |
| return None, "β Failed to fetch comments.", None, None, None, None | |
| result = analyze_comments_with_groq(comments) | |
| if not result: | |
| return None, "β AI Analysis failed.", None, None, None, None | |
| main = result.get('main_poll', {}) | |
| poll_values = [ | |
| main.get('yes_count',0) + main.get('agree_count',0) + main.get('support_count',0), | |
| main.get('no_count',0) + main.get('disagree_count',0) + main.get('oppose_count',0), | |
| main.get('neutral_count',0) | |
| ] | |
| fig_poll = px.pie(names=['Yes/Agree/Support', 'No/Disagree/Oppose', 'Neutral'], | |
| values=poll_values, title="Poll Distribution", hole=0.4) | |
| sent = result.get('sentiment', {}) | |
| fig_sent = px.bar(x=['Positive', 'Negative', 'Neutral'], | |
| y=[sent.get('positive',0), sent.get('negative',0), sent.get('neutral',0)], | |
| title="Sentiment Score", color=['Positive', 'Negative', 'Neutral']) | |
| df = pd.DataFrame(result.get('labeled_comments', [])) | |
| pdf_path = create_pdf_report(result, main.get('question', 'Analysis')) | |
| summary_md = f"### π {main.get('question', 'Analysis')}\n{result.get('summary', '')}" | |
| return df, "β Analysis Complete", fig_poll, fig_sent, summary_md, pdf_path | |
| # ====================== UI ====================== | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# π CommentSurvey AI") | |
| with gr.Row(): | |
| url_input = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...") | |
| btn = gr.Button("Analyze", variant="primary") | |
| status = gr.Markdown("Status: Ready") | |
| with gr.Tabs(): | |
| with gr.Tab("Summary"): | |
| sum_md = gr.Markdown() | |
| with gr.Row(): | |
| p1 = gr.Plot() | |
| p2 = gr.Plot() | |
| with gr.Tab("Data"): | |
| table = gr.Dataframe() | |
| report_file = gr.File(label="Download PDF Report") | |
| btn.click(analyze, inputs=[url_input], outputs=[table, status, p1, p2, sum_md, report_file]) | |
| if __name__ == "__main__": | |
| demo.launch() |