Spaces:

clementBE
/

instagram_test

Sleeping

App Files Files Community

instagram_test / app.py

clementBE

Update app.py

526a62f verified 4 months ago

raw

history blame contribute delete

4.64 kB

	"""
	Gradio app: Instagram Comments Downloader → XLSX
	- Accepts single or multiple post URLs
	- Resolves Media ID automatically
	- Fetches comments using official Instagram Graph API
	- Provides XLSX files for download
	- Shows estimated API calls (credit usage)
	"""

	import os
	import requests
	import pandas as pd
	import gradio as gr
	from io import BytesIO
	from urllib.parse import urlparse

	# Instagram API token stored as HF Space secret
	ACCESS_TOKEN = os.environ.get("INSTAGRAM_API_TOKEN")
	if not ACCESS_TOKEN:
	raise ValueError("Set the Instagram API token in Space secrets as INSTAGRAM_API_TOKEN")

	def resolve_shortcode_to_media_id(shortcode: str):
	"""Resolve Instagram post shortcode to media ID via Graph API OEmbed"""
	url = "https://graph.facebook.com/v19.0/instagram_oembed"
	params = {
	"url": f"https://www.instagram.com/p/{shortcode}/",
	"access_token": ACCESS_TOKEN
	}
	resp = requests.get(url, params=params)
	if resp.status_code != 200:
	raise ValueError(f"Error resolving URL {shortcode}: {resp.status_code} {resp.text}")
	data = resp.json()
	media_id = data.get("media_id")
	if not media_id:
	raise ValueError(f"Could not resolve media ID for shortcode {shortcode}")
	return media_id

	def fetch_instagram_comments(media_id: str, max_comments: int = 200):
	"""Fetch comments for a single post; returns DataFrame and API calls used"""
	columns = ["id", "username", "text", "timestamp"]
	comments_list = []
	count = 0
	api_calls = 0
	url = f"https://graph.facebook.com/v19.0/{media_id}/comments"
	params = {"fields": "id,username,text,timestamp", "limit": 100, "access_token": ACCESS_TOKEN}
	next_url = url

	while next_url and count < max_comments:
	resp = requests.get(next_url, params=params)
	api_calls += 1
	if resp.status_code != 200:
	raise ValueError(f"Error fetching comments: {resp.status_code} {resp.text}")
	data = resp.json()
	for c in data.get("data", []):
	comments_list.append({
	"id": c.get("id"),
	"username": c.get("username"),
	"text": c.get("text"),
	"timestamp": c.get("timestamp")
	})
	count += 1
	if count >= max_comments:
	break
	next_url = data.get("paging", {}).get("next")

	df = pd.DataFrame(comments_list, columns=columns)
	return df, api_calls

	def process_instagram_urls(urls_text, max_comments=200):
	"""Process multiple URLs, return list of (filename, BytesIO) and total API calls"""
	urls = [u.strip() for u in urls_text.replace(",", "\n").split("\n") if u.strip()]
	files = []
	total_api_calls = 0

	for url in urls:
	try:
	parsed = urlparse(url)
	path_parts = parsed.path.strip("/").split("/")
	if len(path_parts) < 2 or path_parts[0] != "p":
	return [], f"Invalid Instagram post URL: {url}"
	shortcode = path_parts[1]
	media_id = resolve_shortcode_to_media_id(shortcode)
	df, api_calls = fetch_instagram_comments(media_id, max_comments)
	total_api_calls += api_calls
	output = BytesIO()
	df.to_excel(output, index=False, engine='openpyxl')
	output.seek(0)
	filename = f"instagram_{shortcode}_comments.xlsx"
	files.append((filename, output))
	except Exception as e:
	return [], f"Error processing {url}: {e}"

	return files, f"Estimated API calls used: {total_api_calls}"

	# --- Gradio UI ---
	with gr.Blocks(title="Instagram Comments Downloader") as demo:
	gr.Markdown(
	"## Instagram Comments → XLSX\n"
	"Paste one or multiple Instagram post URLs (newline or comma separated) and fetch comments."
	)

	urls_input = gr.Textbox(
	label="Instagram Post URLs",
	lines=5,
	placeholder="https://www.instagram.com/p/CODE1/\nhttps://www.instagram.com/p/CODE2/"
	)
	max_comments_input = gr.Number(value=200, label="Max comments per post")
	run_btn = gr.Button("Fetch Comments")
	download_files = gr.Files(label="Download XLSX files", file_types=[".xlsx"])
	api_info = gr.Textbox(label="Estimated API Calls / Status", interactive=False)

	def on_fetch(urls_text, max_comments):
	files, api_estimate = process_instagram_urls(urls_text, int(max_comments))
	return files, api_estimate

	run_btn.click(
	fn=on_fetch,
	inputs=[urls_input, max_comments_input],
	outputs=[download_files, api_info]
	)

	if __name__ == "__main__":
	demo.launch()