Spaces:
Sleeping
Sleeping
File size: 4,643 Bytes
dea18bb 3b64d7b 526a62f dea18bb c5aba08 dea18bb 814c168 dea18bb 814c168 6a9e71a 814c168 526a62f 814c168 6a9e71a 3b64d7b 526a62f 3b64d7b 6a9e71a 3b64d7b 6a9e71a 3b64d7b 6a9e71a 814c168 3b64d7b 814c168 3b64d7b 814c168 6a9e71a 814c168 3b64d7b 814c168 6a9e71a 814c168 6a9e71a 814c168 526a62f 3b64d7b 814c168 6a9e71a 3b64d7b 6a9e71a 3b64d7b 814c168 6a9e71a 526a62f 6a9e71a 3b64d7b 6a9e71a 526a62f 3b64d7b 814c168 6a9e71a 814c168 6a9e71a 3b64d7b 814c168 dea18bb 526a62f 6a9e71a 3b64d7b 9572f33 526a62f dea18bb 6a9e71a 3b64d7b 38e720f 3b64d7b 38e720f dea18bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
"""
Gradio app: Instagram Comments Downloader → XLSX
- Accepts single or multiple post URLs
- Resolves Media ID automatically
- Fetches comments using official Instagram Graph API
- Provides XLSX files for download
- Shows estimated API calls (credit usage)
"""
import os
import requests
import pandas as pd
import gradio as gr
from io import BytesIO
from urllib.parse import urlparse
# Instagram API token stored as HF Space secret
ACCESS_TOKEN = os.environ.get("INSTAGRAM_API_TOKEN")
if not ACCESS_TOKEN:
raise ValueError("Set the Instagram API token in Space secrets as INSTAGRAM_API_TOKEN")
def resolve_shortcode_to_media_id(shortcode: str):
"""Resolve Instagram post shortcode to media ID via Graph API OEmbed"""
url = "https://graph.facebook.com/v19.0/instagram_oembed"
params = {
"url": f"https://www.instagram.com/p/{shortcode}/",
"access_token": ACCESS_TOKEN
}
resp = requests.get(url, params=params)
if resp.status_code != 200:
raise ValueError(f"Error resolving URL {shortcode}: {resp.status_code} {resp.text}")
data = resp.json()
media_id = data.get("media_id")
if not media_id:
raise ValueError(f"Could not resolve media ID for shortcode {shortcode}")
return media_id
def fetch_instagram_comments(media_id: str, max_comments: int = 200):
"""Fetch comments for a single post; returns DataFrame and API calls used"""
columns = ["id", "username", "text", "timestamp"]
comments_list = []
count = 0
api_calls = 0
url = f"https://graph.facebook.com/v19.0/{media_id}/comments"
params = {"fields": "id,username,text,timestamp", "limit": 100, "access_token": ACCESS_TOKEN}
next_url = url
while next_url and count < max_comments:
resp = requests.get(next_url, params=params)
api_calls += 1
if resp.status_code != 200:
raise ValueError(f"Error fetching comments: {resp.status_code} {resp.text}")
data = resp.json()
for c in data.get("data", []):
comments_list.append({
"id": c.get("id"),
"username": c.get("username"),
"text": c.get("text"),
"timestamp": c.get("timestamp")
})
count += 1
if count >= max_comments:
break
next_url = data.get("paging", {}).get("next")
df = pd.DataFrame(comments_list, columns=columns)
return df, api_calls
def process_instagram_urls(urls_text, max_comments=200):
"""Process multiple URLs, return list of (filename, BytesIO) and total API calls"""
urls = [u.strip() for u in urls_text.replace(",", "\n").split("\n") if u.strip()]
files = []
total_api_calls = 0
for url in urls:
try:
parsed = urlparse(url)
path_parts = parsed.path.strip("/").split("/")
if len(path_parts) < 2 or path_parts[0] != "p":
return [], f"Invalid Instagram post URL: {url}"
shortcode = path_parts[1]
media_id = resolve_shortcode_to_media_id(shortcode)
df, api_calls = fetch_instagram_comments(media_id, max_comments)
total_api_calls += api_calls
output = BytesIO()
df.to_excel(output, index=False, engine='openpyxl')
output.seek(0)
filename = f"instagram_{shortcode}_comments.xlsx"
files.append((filename, output))
except Exception as e:
return [], f"Error processing {url}: {e}"
return files, f"Estimated API calls used: {total_api_calls}"
# --- Gradio UI ---
with gr.Blocks(title="Instagram Comments Downloader") as demo:
gr.Markdown(
"## Instagram Comments → XLSX\n"
"Paste one or multiple Instagram post URLs (newline or comma separated) and fetch comments."
)
urls_input = gr.Textbox(
label="Instagram Post URLs",
lines=5,
placeholder="https://www.instagram.com/p/CODE1/\nhttps://www.instagram.com/p/CODE2/"
)
max_comments_input = gr.Number(value=200, label="Max comments per post")
run_btn = gr.Button("Fetch Comments")
download_files = gr.Files(label="Download XLSX files", file_types=[".xlsx"])
api_info = gr.Textbox(label="Estimated API Calls / Status", interactive=False)
def on_fetch(urls_text, max_comments):
files, api_estimate = process_instagram_urls(urls_text, int(max_comments))
return files, api_estimate
run_btn.click(
fn=on_fetch,
inputs=[urls_input, max_comments_input],
outputs=[download_files, api_info]
)
if __name__ == "__main__":
demo.launch()
|