sadickam commited on
Commit
7f934fa
·
verified ·
1 Parent(s): c2c50da

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -18
app.py CHANGED
@@ -77,34 +77,39 @@ def text_to_txt_bytes(text):
77
  except Exception as e:
78
  raise RuntimeError(f"Error during TXT conversion: {e}")
79
 
80
- def on_extract(pdf_file):
81
  """
82
  Callback function to extract text from PDF and return CSV and TXT data.
83
 
84
  Args:
85
- pdf_file (gr.File): Dictionary containing file information.
86
 
87
  Returns:
88
- tuple: CSV bytes and filename, TXT bytes and filename.
89
  """
90
- if pdf_file is None:
91
- return gr.update(), gr.update(), "No file uploaded.", "No file uploaded."
92
 
93
  try:
94
  # Extract text and create DataFrame
95
- df, full_text = extract_text_with_langchain_pdf(pdf_file.name)
96
 
97
  # Convert DataFrame to CSV bytes
98
  csv_bytes = df_to_csv_bytes(df)
99
- csv_filename = f"{pdf_file.name.rsplit('.', 1)[0]}_extracted.csv"
100
 
101
  # Convert full text to TXT bytes
102
  txt_bytes = text_to_txt_bytes(full_text)
103
- txt_filename = f"{pdf_file.name.rsplit('.', 1)[0]}_full_text.txt"
104
 
105
- return csv_bytes, csv_filename, txt_bytes, txt_filename
 
 
 
 
 
106
  except Exception as e:
107
- return gr.update(), gr.update(), f"Extraction failed: {e}", f"Extraction failed: {e}"
108
 
109
  with gr.Blocks() as demo:
110
  gr.Markdown("# 📄 PDF Text Extractor with Metadata and Multiple Exports")
@@ -113,7 +118,7 @@ with gr.Blocks() as demo:
113
  pdf_input = gr.File(
114
  label="Upload PDF",
115
  file_types=[".pdf"],
116
- type="filepath",
117
  interactive=True
118
  )
119
 
@@ -121,15 +126,17 @@ with gr.Blocks() as demo:
121
  extract_button = gr.Button("Extract and Download")
122
 
123
  with gr.Row():
124
- csv_download = gr.Download(
125
- label="Download Extracted CSV"
 
126
  )
127
- txt_download = gr.Download(
128
- label="Download Full Text"
 
129
  )
130
 
131
  with gr.Row():
132
- error_output = gr.Textbox(
133
  label="Status",
134
  interactive=False,
135
  lines=2
@@ -138,12 +145,12 @@ with gr.Blocks() as demo:
138
  extract_button.click(
139
  fn=on_extract,
140
  inputs=pdf_input,
141
- outputs=[csv_download, txt_download, error_output, error_output]
142
  )
143
 
144
  gr.Markdown("""
145
  ---
146
- Developed Gradio and LangChain.
147
  """)
148
 
149
  # Launch the Gradio app
 
77
  except Exception as e:
78
  raise RuntimeError(f"Error during TXT conversion: {e}")
79
 
80
+ def on_extract(pdf_file_path):
81
  """
82
  Callback function to extract text from PDF and return CSV and TXT data.
83
 
84
  Args:
85
+ pdf_file_path (str): The file path to the uploaded PDF.
86
 
87
  Returns:
88
+ tuple: CSV download object, TXT download object, Status message.
89
  """
90
+ if not pdf_file_path:
91
+ return None, None, "No file uploaded."
92
 
93
  try:
94
  # Extract text and create DataFrame
95
+ df, full_text = extract_text_with_langchain_pdf(pdf_file_path)
96
 
97
  # Convert DataFrame to CSV bytes
98
  csv_bytes = df_to_csv_bytes(df)
99
+ csv_filename = f"{pdf_file_path.rsplit('/', 1)[-1].rsplit('.', 1)[0]}_extracted.csv"
100
 
101
  # Convert full text to TXT bytes
102
  txt_bytes = text_to_txt_bytes(full_text)
103
+ txt_filename = f"{pdf_file_path.rsplit('/', 1)[-1].rsplit('.', 1)[0]}_full_text.txt"
104
 
105
+ # Return CSV and TXT files along with a success message
106
+ return (
107
+ (csv_bytes, csv_filename),
108
+ (txt_bytes, txt_filename),
109
+ "Extraction successful!"
110
+ )
111
  except Exception as e:
112
+ return None, None, f"Extraction failed: {e}"
113
 
114
  with gr.Blocks() as demo:
115
  gr.Markdown("# 📄 PDF Text Extractor with Metadata and Multiple Exports")
 
118
  pdf_input = gr.File(
119
  label="Upload PDF",
120
  file_types=[".pdf"],
121
+ type="filepath", # Using "filepath" as per Gradio's valid options
122
  interactive=True
123
  )
124
 
 
126
  extract_button = gr.Button("Extract and Download")
127
 
128
  with gr.Row():
129
+ csv_download = gr.File(
130
+ label="Download Extracted CSV",
131
+ interactive=False
132
  )
133
+ txt_download = gr.File(
134
+ label="Download Full Text",
135
+ interactive=False
136
  )
137
 
138
  with gr.Row():
139
+ status_output = gr.Textbox(
140
  label="Status",
141
  interactive=False,
142
  lines=2
 
145
  extract_button.click(
146
  fn=on_extract,
147
  inputs=pdf_input,
148
+ outputs=[csv_download, txt_download, status_output]
149
  )
150
 
151
  gr.Markdown("""
152
  ---
153
+ Developed with ❤️ using Gradio and LangChain.
154
  """)
155
 
156
  # Launch the Gradio app