jithenderchoudary commited on
Commit
2c84c62
·
verified ·
1 Parent(s): aa7072d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -12
app.py CHANGED
@@ -4,18 +4,14 @@ import pandas as pd
4
 
5
  def extract_data(pdf_file, start_pos, end_pos):
6
  # Load PDF and extract specific rows
7
- with pdfplumber.open(pdf_file) as pdf:
8
  # Code to extract the specified range from Position 10 to Position 450 goes here
9
- # Example processing:
10
- # Initialize list to store extracted data
11
  data = []
12
  for page in pdf.pages:
13
  text = page.extract_text()
14
- # Parse the text to identify positions and extract required rows.
15
- # You’ll customize this part based on your PDF structure.
16
- # Append rows within the specified range to data list
17
 
18
- # Example data structure (replace with actual extracted data)
19
  extracted_data = {
20
  "Pos": [10, 20, 30], # Replace with actual data
21
  "Item Code": ["155569003011", "155569003012", "155569003013"],
@@ -33,15 +29,15 @@ def extract_data(pdf_file, start_pos, end_pos):
33
 
34
  return output_path
35
 
36
- # Gradio interface
37
  interface = gr.Interface(
38
  fn=extract_data,
39
  inputs=[
40
- gr.inputs.File(label="Upload PDF File"),
41
- gr.inputs.Number(default=10, label="Start Position"),
42
- gr.inputs.Number(default=450, label="End Position")
43
  ],
44
- outputs=gr.outputs.File(label="Download Extracted Excel")
45
  )
46
 
47
  interface.launch()
 
4
 
5
  def extract_data(pdf_file, start_pos, end_pos):
6
  # Load PDF and extract specific rows
7
+ with pdfplumber.open(pdf_file.name) as pdf:
8
  # Code to extract the specified range from Position 10 to Position 450 goes here
 
 
9
  data = []
10
  for page in pdf.pages:
11
  text = page.extract_text()
12
+ # Parsing and extracting rows within the specified range goes here.
 
 
13
 
14
+ # Example extracted data (replace with actual extraction logic)
15
  extracted_data = {
16
  "Pos": [10, 20, 30], # Replace with actual data
17
  "Item Code": ["155569003011", "155569003012", "155569003013"],
 
29
 
30
  return output_path
31
 
32
+ # Updated Gradio interface using current components
33
  interface = gr.Interface(
34
  fn=extract_data,
35
  inputs=[
36
+ gr.File(type="file", label="Upload PDF File"),
37
+ gr.Number(value=10, label="Start Position"),
38
+ gr.Number(value=450, label="End Position")
39
  ],
40
+ outputs=gr.File(label="Download Extracted Excel")
41
  )
42
 
43
  interface.launch()