web_scrapper / app.py
7jimmy's picture
Update app.py
665578d verified
#!/usr/bin/python3
__author__ = "https://codeberg.org/allendema"
from bs4 import BeautifulSoup
import requests
import os
import json
import sys
import time
import gradio as gr
def get_user_media(username):
YELLOW = "\033[1;32;40m"
RED = "\033[31m"
headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:94.0) Gecko/20100101 Firefox/103.0.2'}
base_url = "https://story.snapchat.com/@"
mix = base_url + username
media_folder = "media"
os.makedirs(media_folder, exist_ok=True)
r = requests.get(mix, headers=headers)
if not r.ok:
sys.exit(f"{RED} Oh Snap! No connection with Snap!")
soup = BeautifulSoup(r.content, "html.parser")
snaps = soup.find(id="__NEXT_DATA__").string.strip()
data = json.loads(snaps)
try:
for i in data["props"]["pageProps"]["story"]["snapList"]:
file_url = i["snapUrls"]["mediaUrl"]
if file_url == "":
continue
r = requests.get(file_url, stream=True, headers=headers)
if "image" in r.headers['Content-Type']:
file_name = os.path.join(media_folder, r.headers['ETag'].replace('"', '') + ".jpeg")
elif "video" in r.headers['Content-Type']:
file_name = os.path.join(media_folder, r.headers['ETag'].replace('"', '') + ".mp4")
if os.path.isfile(file_name):
continue
sleep(0.3)
if r.status_code == 200:
with open(file_name, 'wb') as f:
for chunk in r:
f.write(chunk)
else:
print("Cannot make connection to download media!")
except KeyError:
print(f"{RED}No user stories found for the last 24h.")
else:
print(f"\nAt least one Story found. Successfully Downloaded.")
def gr_interface(username):
try:
get_user_media(username)
return f"Successfully downloaded media for {username}. Check the 'media' folder."
except Exception as e:
return f"Error: {str(e)}"
iface = gr.Interface(fn=gr_interface, inputs="text", outputs="text")
iface.launch()