Spaces:
Sleeping
Sleeping
| """ | |
| Gradio app: Instagram Comments Downloader → XLSX | |
| - Accepts single or multiple post URLs | |
| - Resolves Media ID automatically | |
| - Fetches comments using official Instagram Graph API | |
| - Provides XLSX files for download | |
| - Shows estimated API calls (credit usage) | |
| """ | |
| import os | |
| import requests | |
| import pandas as pd | |
| import gradio as gr | |
| from io import BytesIO | |
| from urllib.parse import urlparse | |
| # Instagram API token stored as HF Space secret | |
| ACCESS_TOKEN = os.environ.get("INSTAGRAM_API_TOKEN") | |
| if not ACCESS_TOKEN: | |
| raise ValueError("Set the Instagram API token in Space secrets as INSTAGRAM_API_TOKEN") | |
| def resolve_shortcode_to_media_id(shortcode: str): | |
| """Resolve Instagram post shortcode to media ID via Graph API OEmbed""" | |
| url = "https://graph.facebook.com/v19.0/instagram_oembed" | |
| params = { | |
| "url": f"https://www.instagram.com/p/{shortcode}/", | |
| "access_token": ACCESS_TOKEN | |
| } | |
| resp = requests.get(url, params=params) | |
| if resp.status_code != 200: | |
| raise ValueError(f"Error resolving URL {shortcode}: {resp.status_code} {resp.text}") | |
| data = resp.json() | |
| media_id = data.get("media_id") | |
| if not media_id: | |
| raise ValueError(f"Could not resolve media ID for shortcode {shortcode}") | |
| return media_id | |
| def fetch_instagram_comments(media_id: str, max_comments: int = 200): | |
| """Fetch comments for a single post; returns DataFrame and API calls used""" | |
| columns = ["id", "username", "text", "timestamp"] | |
| comments_list = [] | |
| count = 0 | |
| api_calls = 0 | |
| url = f"https://graph.facebook.com/v19.0/{media_id}/comments" | |
| params = {"fields": "id,username,text,timestamp", "limit": 100, "access_token": ACCESS_TOKEN} | |
| next_url = url | |
| while next_url and count < max_comments: | |
| resp = requests.get(next_url, params=params) | |
| api_calls += 1 | |
| if resp.status_code != 200: | |
| raise ValueError(f"Error fetching comments: {resp.status_code} {resp.text}") | |
| data = resp.json() | |
| for c in data.get("data", []): | |
| comments_list.append({ | |
| "id": c.get("id"), | |
| "username": c.get("username"), | |
| "text": c.get("text"), | |
| "timestamp": c.get("timestamp") | |
| }) | |
| count += 1 | |
| if count >= max_comments: | |
| break | |
| next_url = data.get("paging", {}).get("next") | |
| df = pd.DataFrame(comments_list, columns=columns) | |
| return df, api_calls | |
| def process_instagram_urls(urls_text, max_comments=200): | |
| """Process multiple URLs, return list of (filename, BytesIO) and total API calls""" | |
| urls = [u.strip() for u in urls_text.replace(",", "\n").split("\n") if u.strip()] | |
| files = [] | |
| total_api_calls = 0 | |
| for url in urls: | |
| try: | |
| parsed = urlparse(url) | |
| path_parts = parsed.path.strip("/").split("/") | |
| if len(path_parts) < 2 or path_parts[0] != "p": | |
| return [], f"Invalid Instagram post URL: {url}" | |
| shortcode = path_parts[1] | |
| media_id = resolve_shortcode_to_media_id(shortcode) | |
| df, api_calls = fetch_instagram_comments(media_id, max_comments) | |
| total_api_calls += api_calls | |
| output = BytesIO() | |
| df.to_excel(output, index=False, engine='openpyxl') | |
| output.seek(0) | |
| filename = f"instagram_{shortcode}_comments.xlsx" | |
| files.append((filename, output)) | |
| except Exception as e: | |
| return [], f"Error processing {url}: {e}" | |
| return files, f"Estimated API calls used: {total_api_calls}" | |
| # --- Gradio UI --- | |
| with gr.Blocks(title="Instagram Comments Downloader") as demo: | |
| gr.Markdown( | |
| "## Instagram Comments → XLSX\n" | |
| "Paste one or multiple Instagram post URLs (newline or comma separated) and fetch comments." | |
| ) | |
| urls_input = gr.Textbox( | |
| label="Instagram Post URLs", | |
| lines=5, | |
| placeholder="https://www.instagram.com/p/CODE1/\nhttps://www.instagram.com/p/CODE2/" | |
| ) | |
| max_comments_input = gr.Number(value=200, label="Max comments per post") | |
| run_btn = gr.Button("Fetch Comments") | |
| download_files = gr.Files(label="Download XLSX files", file_types=[".xlsx"]) | |
| api_info = gr.Textbox(label="Estimated API Calls / Status", interactive=False) | |
| def on_fetch(urls_text, max_comments): | |
| files, api_estimate = process_instagram_urls(urls_text, int(max_comments)) | |
| return files, api_estimate | |
| run_btn.click( | |
| fn=on_fetch, | |
| inputs=[urls_input, max_comments_input], | |
| outputs=[download_files, api_info] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |