check_url / app.py
viboognesh's picture
Create app.py
c19e33b verified
raw
history blame
1.79 kB
from llama_parse import LlamaParse
from llama_index.core import SimpleDirectoryReader
import os
from dotenv import load_dotenv
load_dotenv()
import tempfile
import requests
import streamlit as st
read_file_path = "160/task_for_you.pdf"
def check_pdf(read_file_path):
try:
parser = LlamaParse(result_type="markdown", api_key=os.environ['LLAMA_CLOUD_API_KEY'], ignore_errors=False)
file_extractor = {".pdf": parser}
markdown_data = SimpleDirectoryReader(input_files=[read_file_path], file_extractor=file_extractor).load_data()
if markdown_data == []:
raise Exception("No markdown data found")
return True
except Exception as e:
print(f"An error occurred: {e}")
return False
def download_file_from_url(url, filename):
print(f"Downloading file from {url} to {filename}")
os.makedirs(os.path.dirname(filename), exist_ok=True)
response = requests.get(url, stream=True)
if response.status_code == 200:
with open(filename, 'wb') as file:
for chunk in response.iter_content(chunk_size=1024):
file.write(chunk)
print(f"File downloaded and saved as {filename}")
return True
else:
print(f"Failed to download file. Status code: {response.status_code}")
return False
url = st.text_input("Enter URL", key="url")
if url:
with tempfile.TemporaryDirectory() as temp_dir:
if download_file_from_url(url, os.path.join(temp_dir, "task_for_you.pdf")):
if check_pdf(os.path.join(temp_dir, "task_for_you.pdf")):
st.success("File downloaded successfully")
else:
st.error("File is corrupted or not a PDF file")
else:
st.error("Failed to download file")