Spaces:

viboognesh
/

check_url

Sleeping

check_url / app.py

Create app.py

c19e33b verified over 1 year ago

1.79 kB

	from llama_parse import LlamaParse
	from llama_index.core import SimpleDirectoryReader
	import os
	from dotenv import load_dotenv
	load_dotenv()
	import tempfile
	import requests
	import streamlit as st

	read_file_path = "160/task_for_you.pdf"
	def check_pdf(read_file_path):
	try:
	parser = LlamaParse(result_type="markdown", api_key=os.environ['LLAMA_CLOUD_API_KEY'], ignore_errors=False)
	file_extractor = {".pdf": parser}
	markdown_data = SimpleDirectoryReader(input_files=[read_file_path], file_extractor=file_extractor).load_data()
	if markdown_data == []:
	raise Exception("No markdown data found")
	return True
	except Exception as e:
	print(f"An error occurred: {e}")
	return False

	def download_file_from_url(url, filename):
	print(f"Downloading file from {url} to {filename}")
	os.makedirs(os.path.dirname(filename), exist_ok=True)
	response = requests.get(url, stream=True)
	if response.status_code == 200:
	with open(filename, 'wb') as file:
	for chunk in response.iter_content(chunk_size=1024):
	file.write(chunk)
	print(f"File downloaded and saved as {filename}")
	return True
	else:
	print(f"Failed to download file. Status code: {response.status_code}")
	return False

	url = st.text_input("Enter URL", key="url")

	if url:
	with tempfile.TemporaryDirectory() as temp_dir:

	if download_file_from_url(url, os.path.join(temp_dir, "task_for_you.pdf")):
	if check_pdf(os.path.join(temp_dir, "task_for_you.pdf")):
	st.success("File downloaded successfully")
	else:
	st.error("File is corrupted or not a PDF file")
	else:
	st.error("Failed to download file")