CommentSurveyAI / app.py
abid-ai's picture
Update app.py
3f373ed verified
import gradio as gr
import json
import plotly.express as px
import pandas as pd
from groq import Groq
from fpdf import FPDF
from youtube_comment_downloader import YoutubeCommentDownloader
import re
import os
import warnings
warnings.filterwarnings("ignore")
# ====================== CONFIG ======================
# On Hugging Face, set 'GROQ_API_KEY' in the "Variables and Secrets" settings tab
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
# ====================== SYSTEM PROMPT ======================
SYSTEM_PROMPT = """
You are an expert social media sentiment and poll analysis AI.
Focus on Yes/No, Agree/Disagree, Support/Oppose, and sentiment.
Handle English + Urdu + Hindi + other languages well.
Return ONLY valid JSON in this exact format:
{
"main_poll": {
"question": "Suggested poll question",
"yes_count": int,
"no_count": int,
"agree_count": int,
"disagree_count": int,
"support_count": int,
"oppose_count": int,
"neutral_count": int
},
"sentiment": {
"positive": float,
"negative": float,
"neutral": float
},
"top_themes": ["theme1", "theme2"],
"summary": "Short professional summary",
"labeled_comments": [
{"comment": "...", "opinion": "Yes|No|Agree|Disagree|Positive|Negative|Neutral|Mixed"}
]
}
"""
# ====================== HELPERS ======================
def clean_text(text):
if not text: return ""
text = re.sub(r'[\u2022\u2023\u25CF\u25BA\u25C4]', '-', text)
text = re.sub(r'[\u2018\u2019\u201C\u201D]', '"', text)
text = re.sub(r'[\u2013\u2014]', '-', text)
text = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', text)
return text.encode('latin-1', 'ignore').decode('latin-1') # Ensure PDF compatibility
def extract_youtube_id(url):
patterns = [r'youtu\.be/([a-zA-Z0-9_-]+)', r'v=([a-zA-Z0-9_-]+)', r'/embed/([a-zA-Z0-9_-]+)', r'/shorts/([a-zA-Z0-9_-]+)']
for p in patterns:
match = re.search(p, url)
if match: return match.group(1)
return None
def fetch_youtube_comments(url, limit=100):
try:
video_id = extract_youtube_id(url)
if not video_id: return []
downloader = YoutubeCommentDownloader()
comments = []
# sort_by=0 is "Newest", 1 is "Top"
gen = downloader.get_comments(video_id, sort_by=1)
for comment in gen:
comments.append(comment['text'])
if len(comments) >= limit: break
return comments
except Exception as e:
print(f"Fetch error: {e}")
return []
def analyze_comments_with_groq(comments, post_context=""):
if not GROQ_API_KEY: return None
try:
client = Groq(api_key=GROQ_API_KEY)
comments_text = "\n\n".join([f"C{i+1}: {clean_text(c)[:200]}" for i, c in enumerate(comments)])
response = client.chat.completions.create(
model="llama-3.3-70b-versatile",
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"Context: {post_context}\n\nComments:\n{comments_text}"}
],
temperature=0.2,
response_format={"type": "json_object"}
)
return json.loads(response.choices[0].message.content)
except Exception as e:
print(f"Groq Error: {e}")
return None
def create_pdf_report(analysis_result, poll_question):
pdf = FPDF()
pdf.add_page()
pdf.set_font('Arial', 'B', 16)
pdf.cell(0, 10, 'CommentSurvey AI Report', 0, 1, 'C')
pdf.ln(10)
pdf.set_font('Arial', 'B', 12)
pdf.cell(0, 10, f"Question: {poll_question[:60]}", 0, 1, 'L')
pdf.set_font('Arial', '', 11)
summary = analysis_result.get('summary', 'N/A')
pdf.multi_cell(0, 7, clean_text(summary))
path = "report.pdf"
pdf.output(path)
return path
# ====================== LOGIC ======================
def analyze(url):
if not GROQ_API_KEY:
return None, "❌ API Key Missing in Hugging Face Secrets", None, None, None, None
comments = fetch_youtube_comments(url)
if not comments:
return None, "❌ Failed to fetch comments.", None, None, None, None
result = analyze_comments_with_groq(comments)
if not result:
return None, "❌ AI Analysis failed.", None, None, None, None
main = result.get('main_poll', {})
poll_values = [
main.get('yes_count',0) + main.get('agree_count',0) + main.get('support_count',0),
main.get('no_count',0) + main.get('disagree_count',0) + main.get('oppose_count',0),
main.get('neutral_count',0)
]
fig_poll = px.pie(names=['Yes/Agree/Support', 'No/Disagree/Oppose', 'Neutral'],
values=poll_values, title="Poll Distribution", hole=0.4)
sent = result.get('sentiment', {})
fig_sent = px.bar(x=['Positive', 'Negative', 'Neutral'],
y=[sent.get('positive',0), sent.get('negative',0), sent.get('neutral',0)],
title="Sentiment Score", color=['Positive', 'Negative', 'Neutral'])
df = pd.DataFrame(result.get('labeled_comments', []))
pdf_path = create_pdf_report(result, main.get('question', 'Analysis'))
summary_md = f"### πŸ“ {main.get('question', 'Analysis')}\n{result.get('summary', '')}"
return df, "βœ… Analysis Complete", fig_poll, fig_sent, summary_md, pdf_path
# ====================== UI ======================
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# πŸ“Š CommentSurvey AI")
with gr.Row():
url_input = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...")
btn = gr.Button("Analyze", variant="primary")
status = gr.Markdown("Status: Ready")
with gr.Tabs():
with gr.Tab("Summary"):
sum_md = gr.Markdown()
with gr.Row():
p1 = gr.Plot()
p2 = gr.Plot()
with gr.Tab("Data"):
table = gr.Dataframe()
report_file = gr.File(label="Download PDF Report")
btn.click(analyze, inputs=[url_input], outputs=[table, status, p1, p2, sum_md, report_file])
if __name__ == "__main__":
demo.launch()