instagram_test / app.py
clementBE's picture
Update app.py
526a62f verified
"""
Gradio app: Instagram Comments Downloader → XLSX
- Accepts single or multiple post URLs
- Resolves Media ID automatically
- Fetches comments using official Instagram Graph API
- Provides XLSX files for download
- Shows estimated API calls (credit usage)
"""
import os
import requests
import pandas as pd
import gradio as gr
from io import BytesIO
from urllib.parse import urlparse
# Instagram API token stored as HF Space secret
ACCESS_TOKEN = os.environ.get("INSTAGRAM_API_TOKEN")
if not ACCESS_TOKEN:
raise ValueError("Set the Instagram API token in Space secrets as INSTAGRAM_API_TOKEN")
def resolve_shortcode_to_media_id(shortcode: str):
"""Resolve Instagram post shortcode to media ID via Graph API OEmbed"""
url = "https://graph.facebook.com/v19.0/instagram_oembed"
params = {
"url": f"https://www.instagram.com/p/{shortcode}/",
"access_token": ACCESS_TOKEN
}
resp = requests.get(url, params=params)
if resp.status_code != 200:
raise ValueError(f"Error resolving URL {shortcode}: {resp.status_code} {resp.text}")
data = resp.json()
media_id = data.get("media_id")
if not media_id:
raise ValueError(f"Could not resolve media ID for shortcode {shortcode}")
return media_id
def fetch_instagram_comments(media_id: str, max_comments: int = 200):
"""Fetch comments for a single post; returns DataFrame and API calls used"""
columns = ["id", "username", "text", "timestamp"]
comments_list = []
count = 0
api_calls = 0
url = f"https://graph.facebook.com/v19.0/{media_id}/comments"
params = {"fields": "id,username,text,timestamp", "limit": 100, "access_token": ACCESS_TOKEN}
next_url = url
while next_url and count < max_comments:
resp = requests.get(next_url, params=params)
api_calls += 1
if resp.status_code != 200:
raise ValueError(f"Error fetching comments: {resp.status_code} {resp.text}")
data = resp.json()
for c in data.get("data", []):
comments_list.append({
"id": c.get("id"),
"username": c.get("username"),
"text": c.get("text"),
"timestamp": c.get("timestamp")
})
count += 1
if count >= max_comments:
break
next_url = data.get("paging", {}).get("next")
df = pd.DataFrame(comments_list, columns=columns)
return df, api_calls
def process_instagram_urls(urls_text, max_comments=200):
"""Process multiple URLs, return list of (filename, BytesIO) and total API calls"""
urls = [u.strip() for u in urls_text.replace(",", "\n").split("\n") if u.strip()]
files = []
total_api_calls = 0
for url in urls:
try:
parsed = urlparse(url)
path_parts = parsed.path.strip("/").split("/")
if len(path_parts) < 2 or path_parts[0] != "p":
return [], f"Invalid Instagram post URL: {url}"
shortcode = path_parts[1]
media_id = resolve_shortcode_to_media_id(shortcode)
df, api_calls = fetch_instagram_comments(media_id, max_comments)
total_api_calls += api_calls
output = BytesIO()
df.to_excel(output, index=False, engine='openpyxl')
output.seek(0)
filename = f"instagram_{shortcode}_comments.xlsx"
files.append((filename, output))
except Exception as e:
return [], f"Error processing {url}: {e}"
return files, f"Estimated API calls used: {total_api_calls}"
# --- Gradio UI ---
with gr.Blocks(title="Instagram Comments Downloader") as demo:
gr.Markdown(
"## Instagram Comments → XLSX\n"
"Paste one or multiple Instagram post URLs (newline or comma separated) and fetch comments."
)
urls_input = gr.Textbox(
label="Instagram Post URLs",
lines=5,
placeholder="https://www.instagram.com/p/CODE1/\nhttps://www.instagram.com/p/CODE2/"
)
max_comments_input = gr.Number(value=200, label="Max comments per post")
run_btn = gr.Button("Fetch Comments")
download_files = gr.Files(label="Download XLSX files", file_types=[".xlsx"])
api_info = gr.Textbox(label="Estimated API Calls / Status", interactive=False)
def on_fetch(urls_text, max_comments):
files, api_estimate = process_instagram_urls(urls_text, int(max_comments))
return files, api_estimate
run_btn.click(
fn=on_fetch,
inputs=[urls_input, max_comments_input],
outputs=[download_files, api_info]
)
if __name__ == "__main__":
demo.launch()