redact_pdf / app.py
Darkaido001's picture
change pdf reader in langchain
6342db2
from warnings import showwarning
from langchain_community.document_loaders import PyMuPDFLoader
from pathlib import Path
import gradio as gr
import requests
import logging
import os
from tqdm import tqdm
# Configure logging with a custom format
logging.basicConfig(filename="gfgnewlog.log", filemode="w", format="%(asctime)s - %(levelname)s - %(message)s")
# Create a logger instance
logger = logging.getLogger(__name__)
# Create a FileHandler to log to 'logs.log' file
file_handler = logging.FileHandler('logs.log')
# Add the FileHandler to the logger
logger.addHandler(file_handler)
def upload_file(filepath,progress=gr.Progress()):
d = ""
name = Path(filepath).name
loader = PyMuPDFLoader(Path(filepath))
pages = loader.load()
for page in progress.tqdm(pages,desc="Loading..."):
d +=page.page_content
# Log a warning message using the logger
logger.warning("pdf extracted")
# API Fetch
url = os.getenv("url")
myobj = {"text": d}
x = requests.post(url, json = myobj)
logger.warning("API EXtraction done")
# write txt file
output_file = name.split(".")[0]+".txt"
folder_path = "./data_file"
# check if folder exists
if not os.path.exists(folder_path):
os.makedirs(folder_path)
# create and save txt file
main_txt_path = folder_path+"/"+output_file
with open(main_txt_path, "w") as file:
file.write(x.text) # Write the content to the output
logger.warning("writing txt done")
return [gr.UploadButton(visible=False), gr.DownloadButton(label=f"Download {output_file}", value=main_txt_path, visible=True),"Loaded"]
def download_file():
return [gr.UploadButton(visible=True), gr.DownloadButton(visible=False)]
with gr.Blocks() as demo:
gr.Markdown("First upload a pdf file and and then you'll be able download txt file")
with gr.Row():
u = gr.UploadButton("Upload a file", file_count="single",)
d = gr.DownloadButton("Download the file", visible=False)
label = gr.Label(label="Loader")
u.upload(upload_file, u, [u, d,label])
d.click(download_file, None, [u, d])
if __name__ == "__main__":
username=os.getenv("username")
Password = os.getenv("Password")
demo.launch(auth=(username,Password),auth_message="If you don't have Login Credentials then please contact on IT Team")