Spaces:
Sleeping
Sleeping
| from llama_parse import LlamaParse | |
| from llama_index.core import SimpleDirectoryReader | |
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| import tempfile | |
| import requests | |
| import streamlit as st | |
| read_file_path = "160/task_for_you.pdf" | |
| def check_pdf(read_file_path): | |
| try: | |
| parser = LlamaParse(result_type="markdown", api_key=os.environ['LLAMA_CLOUD_API_KEY'], ignore_errors=False) | |
| file_extractor = {".pdf": parser} | |
| markdown_data = SimpleDirectoryReader(input_files=[read_file_path], file_extractor=file_extractor).load_data() | |
| if markdown_data == []: | |
| raise Exception("No markdown data found") | |
| return True | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |
| return False | |
| def download_file_from_url(url, filename): | |
| print(f"Downloading file from {url} to {filename}") | |
| os.makedirs(os.path.dirname(filename), exist_ok=True) | |
| response = requests.get(url, stream=True) | |
| if response.status_code == 200: | |
| with open(filename, 'wb') as file: | |
| for chunk in response.iter_content(chunk_size=1024): | |
| file.write(chunk) | |
| print(f"File downloaded and saved as {filename}") | |
| return True | |
| else: | |
| print(f"Failed to download file. Status code: {response.status_code}") | |
| return False | |
| url = st.text_input("Enter URL", key="url") | |
| if url: | |
| with tempfile.TemporaryDirectory() as temp_dir: | |
| if download_file_from_url(url, os.path.join(temp_dir, "task_for_you.pdf")): | |
| if check_pdf(os.path.join(temp_dir, "task_for_you.pdf")): | |
| st.success("File downloaded successfully") | |
| else: | |
| st.error("File is corrupted or not a PDF file") | |
| else: | |
| st.error("Failed to download file") |