jithenderchoudary commited on
Commit
e19bc17
·
verified ·
1 Parent(s): 25a7d4e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pdfplumber
3
+ import pandas as pd
4
+
5
+ def extract_data(pdf_file, start_pos, end_pos):
6
+ # Load PDF and extract specific rows
7
+ with pdfplumber.open(pdf_file) as pdf:
8
+ # Code to extract the specified range from Position 10 to Position 450 goes here
9
+ # Example processing:
10
+ # Initialize list to store extracted data
11
+ data = []
12
+ for page in pdf.pages:
13
+ text = page.extract_text()
14
+ # Parse the text to identify positions and extract required rows.
15
+ # You’ll customize this part based on your PDF structure.
16
+ # Append rows within the specified range to data list
17
+
18
+ # Example data structure (replace with actual extracted data)
19
+ extracted_data = {
20
+ "Pos": [10, 20, 30], # Replace with actual data
21
+ "Item Code": ["155569003011", "155569003012", "155569003013"],
22
+ "Quantity": [10, 10, 10],
23
+ "Basic Price": [57.66, 57.66, 57.66],
24
+ "Sub Total": [576.60, 576.60, 576.60]
25
+ }
26
+
27
+ # Convert extracted data to a DataFrame
28
+ df = pd.DataFrame(extracted_data)
29
+
30
+ # Save DataFrame to Excel
31
+ output_path = "/tmp/extracted_data.xlsx"
32
+ df.to_excel(output_path, index=False)
33
+
34
+ return output_path
35
+
36
+ # Gradio interface
37
+ interface = gr.Interface(
38
+ fn=extract_data,
39
+ inputs=[
40
+ gr.inputs.File(label="Upload PDF File"),
41
+ gr.inputs.Number(default=10, label="Start Position"),
42
+ gr.inputs.Number(default=450, label="End Position")
43
+ ],
44
+ outputs=gr.outputs.File(label="Download Extracted Excel")
45
+ )
46
+
47
+ interface.launch()