""" Gradio app: Instagram Comments Downloader → XLSX - Accepts single or multiple post URLs - Resolves Media ID automatically - Fetches comments using official Instagram Graph API - Provides XLSX files for download - Shows estimated API calls (credit usage) """ import os import requests import pandas as pd import gradio as gr from io import BytesIO from urllib.parse import urlparse # Instagram API token stored as HF Space secret ACCESS_TOKEN = os.environ.get("INSTAGRAM_API_TOKEN") if not ACCESS_TOKEN: raise ValueError("Set the Instagram API token in Space secrets as INSTAGRAM_API_TOKEN") def resolve_shortcode_to_media_id(shortcode: str): """Resolve Instagram post shortcode to media ID via Graph API OEmbed""" url = "https://graph.facebook.com/v19.0/instagram_oembed" params = { "url": f"https://www.instagram.com/p/{shortcode}/", "access_token": ACCESS_TOKEN } resp = requests.get(url, params=params) if resp.status_code != 200: raise ValueError(f"Error resolving URL {shortcode}: {resp.status_code} {resp.text}") data = resp.json() media_id = data.get("media_id") if not media_id: raise ValueError(f"Could not resolve media ID for shortcode {shortcode}") return media_id def fetch_instagram_comments(media_id: str, max_comments: int = 200): """Fetch comments for a single post; returns DataFrame and API calls used""" columns = ["id", "username", "text", "timestamp"] comments_list = [] count = 0 api_calls = 0 url = f"https://graph.facebook.com/v19.0/{media_id}/comments" params = {"fields": "id,username,text,timestamp", "limit": 100, "access_token": ACCESS_TOKEN} next_url = url while next_url and count < max_comments: resp = requests.get(next_url, params=params) api_calls += 1 if resp.status_code != 200: raise ValueError(f"Error fetching comments: {resp.status_code} {resp.text}") data = resp.json() for c in data.get("data", []): comments_list.append({ "id": c.get("id"), "username": c.get("username"), "text": c.get("text"), "timestamp": c.get("timestamp") }) count += 1 if count >= max_comments: break next_url = data.get("paging", {}).get("next") df = pd.DataFrame(comments_list, columns=columns) return df, api_calls def process_instagram_urls(urls_text, max_comments=200): """Process multiple URLs, return list of (filename, BytesIO) and total API calls""" urls = [u.strip() for u in urls_text.replace(",", "\n").split("\n") if u.strip()] files = [] total_api_calls = 0 for url in urls: try: parsed = urlparse(url) path_parts = parsed.path.strip("/").split("/") if len(path_parts) < 2 or path_parts[0] != "p": return [], f"Invalid Instagram post URL: {url}" shortcode = path_parts[1] media_id = resolve_shortcode_to_media_id(shortcode) df, api_calls = fetch_instagram_comments(media_id, max_comments) total_api_calls += api_calls output = BytesIO() df.to_excel(output, index=False, engine='openpyxl') output.seek(0) filename = f"instagram_{shortcode}_comments.xlsx" files.append((filename, output)) except Exception as e: return [], f"Error processing {url}: {e}" return files, f"Estimated API calls used: {total_api_calls}" # --- Gradio UI --- with gr.Blocks(title="Instagram Comments Downloader") as demo: gr.Markdown( "## Instagram Comments → XLSX\n" "Paste one or multiple Instagram post URLs (newline or comma separated) and fetch comments." ) urls_input = gr.Textbox( label="Instagram Post URLs", lines=5, placeholder="https://www.instagram.com/p/CODE1/\nhttps://www.instagram.com/p/CODE2/" ) max_comments_input = gr.Number(value=200, label="Max comments per post") run_btn = gr.Button("Fetch Comments") download_files = gr.Files(label="Download XLSX files", file_types=[".xlsx"]) api_info = gr.Textbox(label="Estimated API Calls / Status", interactive=False) def on_fetch(urls_text, max_comments): files, api_estimate = process_instagram_urls(urls_text, int(max_comments)) return files, api_estimate run_btn.click( fn=on_fetch, inputs=[urls_input, max_comments_input], outputs=[download_files, api_info] ) if __name__ == "__main__": demo.launch()