Spaces:

7jimmy
/

web_scrapper

Sleeping

App Files Files Community

web_scrapper / app.py

7jimmy

Update app.py

665578d verified almost 2 years ago

raw

history blame contribute delete

2.16 kB

	#!/usr/bin/python3
	__author__ = "https://codeberg.org/allendema"

	from bs4 import BeautifulSoup
	import requests
	import os
	import json
	import sys
	import time
	import gradio as gr

	def get_user_media(username):
	YELLOW = "\033[1;32;40m"
	RED = "\033[31m"

	headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:94.0) Gecko/20100101 Firefox/103.0.2'}
	base_url = "https://story.snapchat.com/@"
	mix = base_url + username

	media_folder = "media"
	os.makedirs(media_folder, exist_ok=True)

	r = requests.get(mix, headers=headers)

	if not r.ok:
	sys.exit(f"{RED} Oh Snap! No connection with Snap!")

	soup = BeautifulSoup(r.content, "html.parser")

	snaps = soup.find(id="__NEXT_DATA__").string.strip()

	data = json.loads(snaps)

	try:
	for i in data["props"]["pageProps"]["story"]["snapList"]:
	file_url = i["snapUrls"]["mediaUrl"]
	if file_url == "":
	continue

	r = requests.get(file_url, stream=True, headers=headers)

	if "image" in r.headers['Content-Type']:
	file_name = os.path.join(media_folder, r.headers['ETag'].replace('"', '') + ".jpeg")

	elif "video" in r.headers['Content-Type']:
	file_name = os.path.join(media_folder, r.headers['ETag'].replace('"', '') + ".mp4")

	if os.path.isfile(file_name):
	continue

	sleep(0.3)

	if r.status_code == 200:
	with open(file_name, 'wb') as f:
	for chunk in r:
	f.write(chunk)
	else:
	print("Cannot make connection to download media!")

	except KeyError:
	print(f"{RED}No user stories found for the last 24h.")
	else:
	print(f"\nAt least one Story found. Successfully Downloaded.")

	def gr_interface(username):
	try:
	get_user_media(username)
	return f"Successfully downloaded media for {username}. Check the 'media' folder."
	except Exception as e:
	return f"Error: {str(e)}"

	iface = gr.Interface(fn=gr_interface, inputs="text", outputs="text")
	iface.launch()