jithenderchoudary commited on
Commit
18bd25a
·
verified ·
1 Parent(s): c2f4a04

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -24
app.py CHANGED
@@ -3,32 +3,42 @@ import pdfplumber
3
  import pandas as pd
4
 
5
  def extract_data(pdf_file_path, start_pos, end_pos):
6
- # Load PDF and extract specific rows
7
- with pdfplumber.open(pdf_file_path) as pdf:
8
- data = []
9
- for page in pdf.pages:
10
- text = page.extract_text()
11
- # Parsing and extracting rows within the specified range goes here.
 
 
 
 
 
12
 
13
- # Example extracted data (replace with actual extraction logic)
14
- extracted_data = {
15
- "Pos": [10, 20, 30], # Replace with actual data
16
- "Item Code": ["155569003011", "155569003012", "155569003013"],
17
- "Quantity": [10, 10, 10],
18
- "Basic Price": [57.66, 57.66, 57.66],
19
- "Sub Total": [576.60, 576.60, 576.60]
20
- }
21
-
22
- # Convert extracted data to a DataFrame
23
- df = pd.DataFrame(extracted_data)
24
-
25
- # Save DataFrame to Excel
26
- output_path = "/tmp/extracted_data.xlsx"
27
- df.to_excel(output_path, index=False)
 
 
 
28
 
29
- return output_path
 
 
30
 
31
- # Updated Gradio interface using current components
32
  interface = gr.Interface(
33
  fn=extract_data,
34
  inputs=[
@@ -40,4 +50,3 @@ interface = gr.Interface(
40
  )
41
 
42
  interface.launch()
43
-
 
3
  import pandas as pd
4
 
5
  def extract_data(pdf_file_path, start_pos, end_pos):
6
+ try:
7
+ # Attempt to load and process the PDF
8
+ with pdfplumber.open(pdf_file_path) as pdf:
9
+ data = []
10
+ for page in pdf.pages:
11
+ text = page.extract_text()
12
+ if text is None:
13
+ return "Error: Could not extract text from the PDF. Please check the file format."
14
+
15
+ # Print text for debugging
16
+ print(text) # Debugging line
17
 
18
+ # Sample data collection logic; replace with actual parsing logic
19
+ # Here you would filter by 'Pos' values within start_pos to end_pos
20
+
21
+ # Example data to simulate output
22
+ extracted_data = {
23
+ "Pos": [10, 20, 30], # Replace with actual data
24
+ "Item Code": ["155569003011", "155569003012", "155569003013"],
25
+ "Quantity": [10, 10, 10],
26
+ "Basic Price": [57.66, 57.66, 57.66],
27
+ "Sub Total": [576.60, 576.60, 576.60]
28
+ }
29
+
30
+ # Convert to DataFrame and save to Excel
31
+ df = pd.DataFrame(extracted_data)
32
+ output_path = "/tmp/extracted_data.xlsx"
33
+ df.to_excel(output_path, index=False)
34
+
35
+ return output_path
36
 
37
+ except Exception as e:
38
+ # Return the error message if an exception occurs
39
+ return f"Error: {e}"
40
 
41
+ # Gradio interface setup
42
  interface = gr.Interface(
43
  fn=extract_data,
44
  inputs=[
 
50
  )
51
 
52
  interface.launch()