File size: 4,643 Bytes
dea18bb
3b64d7b
 
 
 
 
526a62f
dea18bb
 
c5aba08
dea18bb
814c168
dea18bb
814c168
6a9e71a
814c168
526a62f
814c168
 
 
 
6a9e71a
3b64d7b
526a62f
3b64d7b
 
 
 
6a9e71a
 
3b64d7b
6a9e71a
3b64d7b
 
 
 
6a9e71a
814c168
3b64d7b
814c168
 
 
3b64d7b
814c168
6a9e71a
814c168
 
 
 
3b64d7b
814c168
6a9e71a
814c168
6a9e71a
814c168
 
 
 
 
 
 
 
 
 
526a62f
3b64d7b
 
814c168
6a9e71a
3b64d7b
6a9e71a
 
3b64d7b
814c168
6a9e71a
 
 
 
 
526a62f
6a9e71a
 
3b64d7b
 
6a9e71a
 
 
 
 
 
526a62f
3b64d7b
 
814c168
6a9e71a
 
814c168
6a9e71a
3b64d7b
814c168
dea18bb
526a62f
 
 
 
 
6a9e71a
3b64d7b
9572f33
526a62f
dea18bb
6a9e71a
3b64d7b
 
38e720f
3b64d7b
 
 
 
 
38e720f
 
dea18bb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
"""
Gradio app: Instagram Comments Downloader → XLSX
- Accepts single or multiple post URLs
- Resolves Media ID automatically
- Fetches comments using official Instagram Graph API
- Provides XLSX files for download
- Shows estimated API calls (credit usage)
"""

import os
import requests
import pandas as pd
import gradio as gr
from io import BytesIO
from urllib.parse import urlparse

# Instagram API token stored as HF Space secret
ACCESS_TOKEN = os.environ.get("INSTAGRAM_API_TOKEN")
if not ACCESS_TOKEN:
    raise ValueError("Set the Instagram API token in Space secrets as INSTAGRAM_API_TOKEN")

def resolve_shortcode_to_media_id(shortcode: str):
    """Resolve Instagram post shortcode to media ID via Graph API OEmbed"""
    url = "https://graph.facebook.com/v19.0/instagram_oembed"
    params = {
        "url": f"https://www.instagram.com/p/{shortcode}/",
        "access_token": ACCESS_TOKEN
    }
    resp = requests.get(url, params=params)
    if resp.status_code != 200:
        raise ValueError(f"Error resolving URL {shortcode}: {resp.status_code} {resp.text}")
    data = resp.json()
    media_id = data.get("media_id")
    if not media_id:
        raise ValueError(f"Could not resolve media ID for shortcode {shortcode}")
    return media_id

def fetch_instagram_comments(media_id: str, max_comments: int = 200):
    """Fetch comments for a single post; returns DataFrame and API calls used"""
    columns = ["id", "username", "text", "timestamp"]
    comments_list = []
    count = 0
    api_calls = 0
    url = f"https://graph.facebook.com/v19.0/{media_id}/comments"
    params = {"fields": "id,username,text,timestamp", "limit": 100, "access_token": ACCESS_TOKEN}
    next_url = url

    while next_url and count < max_comments:
        resp = requests.get(next_url, params=params)
        api_calls += 1
        if resp.status_code != 200:
            raise ValueError(f"Error fetching comments: {resp.status_code} {resp.text}")
        data = resp.json()
        for c in data.get("data", []):
            comments_list.append({
                "id": c.get("id"),
                "username": c.get("username"),
                "text": c.get("text"),
                "timestamp": c.get("timestamp")
            })
            count += 1
            if count >= max_comments:
                break
        next_url = data.get("paging", {}).get("next")

    df = pd.DataFrame(comments_list, columns=columns)
    return df, api_calls

def process_instagram_urls(urls_text, max_comments=200):
    """Process multiple URLs, return list of (filename, BytesIO) and total API calls"""
    urls = [u.strip() for u in urls_text.replace(",", "\n").split("\n") if u.strip()]
    files = []
    total_api_calls = 0

    for url in urls:
        try:
            parsed = urlparse(url)
            path_parts = parsed.path.strip("/").split("/")
            if len(path_parts) < 2 or path_parts[0] != "p":
                return [], f"Invalid Instagram post URL: {url}"
            shortcode = path_parts[1]
            media_id = resolve_shortcode_to_media_id(shortcode)
            df, api_calls = fetch_instagram_comments(media_id, max_comments)
            total_api_calls += api_calls
            output = BytesIO()
            df.to_excel(output, index=False, engine='openpyxl')
            output.seek(0)
            filename = f"instagram_{shortcode}_comments.xlsx"
            files.append((filename, output))
        except Exception as e:
            return [], f"Error processing {url}: {e}"

    return files, f"Estimated API calls used: {total_api_calls}"

# --- Gradio UI ---
with gr.Blocks(title="Instagram Comments Downloader") as demo:
    gr.Markdown(
        "## Instagram Comments → XLSX\n"
        "Paste one or multiple Instagram post URLs (newline or comma separated) and fetch comments."
    )

    urls_input = gr.Textbox(
        label="Instagram Post URLs",
        lines=5,
        placeholder="https://www.instagram.com/p/CODE1/\nhttps://www.instagram.com/p/CODE2/"
    )
    max_comments_input = gr.Number(value=200, label="Max comments per post")
    run_btn = gr.Button("Fetch Comments")
    download_files = gr.Files(label="Download XLSX files", file_types=[".xlsx"])
    api_info = gr.Textbox(label="Estimated API Calls / Status", interactive=False)

    def on_fetch(urls_text, max_comments):
        files, api_estimate = process_instagram_urls(urls_text, int(max_comments))
        return files, api_estimate

    run_btn.click(
        fn=on_fetch,
        inputs=[urls_input, max_comments_input],
        outputs=[download_files, api_info]
    )

if __name__ == "__main__":
    demo.launch()