Spaces:

abid-ai
/

CommentSurveyAI

Sleeping

App Files Files Community

CommentSurveyAI / app.py

abid-ai

Update app.py

3f373ed verified 19 days ago

raw

history blame contribute delete

6.19 kB

	import gradio as gr
	import json
	import plotly.express as px
	import pandas as pd
	from groq import Groq
	from fpdf import FPDF
	from youtube_comment_downloader import YoutubeCommentDownloader
	import re
	import os
	import warnings

	warnings.filterwarnings("ignore")

	# ====================== CONFIG ======================
	# On Hugging Face, set 'GROQ_API_KEY' in the "Variables and Secrets" settings tab
	GROQ_API_KEY = os.getenv("GROQ_API_KEY")

	# ====================== SYSTEM PROMPT ======================
	SYSTEM_PROMPT = """
	You are an expert social media sentiment and poll analysis AI.
	Focus on Yes/No, Agree/Disagree, Support/Oppose, and sentiment.

	Handle English + Urdu + Hindi + other languages well.
	Return ONLY valid JSON in this exact format:
	{
	"main_poll": {
	"question": "Suggested poll question",
	"yes_count": int,
	"no_count": int,
	"agree_count": int,
	"disagree_count": int,
	"support_count": int,
	"oppose_count": int,
	"neutral_count": int
	},
	"sentiment": {
	"positive": float,
	"negative": float,
	"neutral": float
	},
	"top_themes": ["theme1", "theme2"],
	"summary": "Short professional summary",
	"labeled_comments": [
	{"comment": "...", "opinion": "Yes\|No\|Agree\|Disagree\|Positive\|Negative\|Neutral\|Mixed"}
	]
	}
	"""

	# ====================== HELPERS ======================
	def clean_text(text):
	if not text: return ""
	text = re.sub(r'[\u2022\u2023\u25CF\u25BA\u25C4]', '-', text)
	text = re.sub(r'[\u2018\u2019\u201C\u201D]', '"', text)
	text = re.sub(r'[\u2013\u2014]', '-', text)
	text = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', text)
	return text.encode('latin-1', 'ignore').decode('latin-1') # Ensure PDF compatibility

	def extract_youtube_id(url):
	patterns = [r'youtu\.be/([a-zA-Z0-9_-]+)', r'v=([a-zA-Z0-9_-]+)', r'/embed/([a-zA-Z0-9_-]+)', r'/shorts/([a-zA-Z0-9_-]+)']
	for p in patterns:
	match = re.search(p, url)
	if match: return match.group(1)
	return None

	def fetch_youtube_comments(url, limit=100):
	try:
	video_id = extract_youtube_id(url)
	if not video_id: return []
	downloader = YoutubeCommentDownloader()
	comments = []
	# sort_by=0 is "Newest", 1 is "Top"
	gen = downloader.get_comments(video_id, sort_by=1)
	for comment in gen:
	comments.append(comment['text'])
	if len(comments) >= limit: break
	return comments
	except Exception as e:
	print(f"Fetch error: {e}")
	return []

	def analyze_comments_with_groq(comments, post_context=""):
	if not GROQ_API_KEY: return None
	try:
	client = Groq(api_key=GROQ_API_KEY)
	comments_text = "\n\n".join([f"C{i+1}: {clean_text(c)[:200]}" for i, c in enumerate(comments)])

	response = client.chat.completions.create(
	model="llama-3.3-70b-versatile",
	messages=[
	{"role": "system", "content": SYSTEM_PROMPT},
	{"role": "user", "content": f"Context: {post_context}\n\nComments:\n{comments_text}"}
	],
	temperature=0.2,
	response_format={"type": "json_object"}
	)
	return json.loads(response.choices[0].message.content)
	except Exception as e:
	print(f"Groq Error: {e}")
	return None

	def create_pdf_report(analysis_result, poll_question):
	pdf = FPDF()
	pdf.add_page()
	pdf.set_font('Arial', 'B', 16)
	pdf.cell(0, 10, 'CommentSurvey AI Report', 0, 1, 'C')
	pdf.ln(10)

	pdf.set_font('Arial', 'B', 12)
	pdf.cell(0, 10, f"Question: {poll_question[:60]}", 0, 1, 'L')

	pdf.set_font('Arial', '', 11)
	summary = analysis_result.get('summary', 'N/A')
	pdf.multi_cell(0, 7, clean_text(summary))

	path = "report.pdf"
	pdf.output(path)
	return path

	# ====================== LOGIC ======================
	def analyze(url):
	if not GROQ_API_KEY:
	return None, "❌ API Key Missing in Hugging Face Secrets", None, None, None, None

	comments = fetch_youtube_comments(url)
	if not comments:
	return None, "❌ Failed to fetch comments.", None, None, None, None

	result = analyze_comments_with_groq(comments)
	if not result:
	return None, "❌ AI Analysis failed.", None, None, None, None

	main = result.get('main_poll', {})
	poll_values = [
	main.get('yes_count',0) + main.get('agree_count',0) + main.get('support_count',0),
	main.get('no_count',0) + main.get('disagree_count',0) + main.get('oppose_count',0),
	main.get('neutral_count',0)
	]

	fig_poll = px.pie(names=['Yes/Agree/Support', 'No/Disagree/Oppose', 'Neutral'],
	values=poll_values, title="Poll Distribution", hole=0.4)

	sent = result.get('sentiment', {})
	fig_sent = px.bar(x=['Positive', 'Negative', 'Neutral'],
	y=[sent.get('positive',0), sent.get('negative',0), sent.get('neutral',0)],
	title="Sentiment Score", color=['Positive', 'Negative', 'Neutral'])

	df = pd.DataFrame(result.get('labeled_comments', []))
	pdf_path = create_pdf_report(result, main.get('question', 'Analysis'))

	summary_md = f"### 📝 {main.get('question', 'Analysis')}\n{result.get('summary', '')}"

	return df, "✅ Analysis Complete", fig_poll, fig_sent, summary_md, pdf_path

	# ====================== UI ======================
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 📊 CommentSurvey AI")

	with gr.Row():
	url_input = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...")
	btn = gr.Button("Analyze", variant="primary")

	status = gr.Markdown("Status: Ready")

	with gr.Tabs():
	with gr.Tab("Summary"):
	sum_md = gr.Markdown()
	with gr.Row():
	p1 = gr.Plot()
	p2 = gr.Plot()
	with gr.Tab("Data"):
	table = gr.Dataframe()

	report_file = gr.File(label="Download PDF Report")

	btn.click(analyze, inputs=[url_input], outputs=[table, status, p1, p2, sum_md, report_file])

	if __name__ == "__main__":
	demo.launch()