Spaces:

jithenderchoudary
/

pdfextract1

Sleeping

App Files Files Community

jithenderchoudary commited on Nov 7, 2024

Commit

e19bc17

verified ·

1 Parent(s): 25a7d4e

Create app.py

Browse files

Files changed (1) hide show

app.py +47 -0

app.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import gradio as gr
+import pdfplumber
+import pandas as pd
+def extract_data(pdf_file, start_pos, end_pos):
+    # Load PDF and extract specific rows
+    with pdfplumber.open(pdf_file) as pdf:
+        # Code to extract the specified range from Position 10 to Position 450 goes here
+        # Example processing:
+        # Initialize list to store extracted data
+        data = []
+        for page in pdf.pages:
+            text = page.extract_text()
+            # Parse the text to identify positions and extract required rows.
+            # You’ll customize this part based on your PDF structure.
+            # Append rows within the specified range to data list
+        # Example data structure (replace with actual extracted data)
+        extracted_data = {
+            "Pos": [10, 20, 30],  # Replace with actual data
+            "Item Code": ["155569003011", "155569003012", "155569003013"],
+            "Quantity": [10, 10, 10],
+            "Basic Price": [57.66, 57.66, 57.66],
+            "Sub Total": [576.60, 576.60, 576.60]
+        }
+        # Convert extracted data to a DataFrame
+        df = pd.DataFrame(extracted_data)
+        # Save DataFrame to Excel
+        output_path = "/tmp/extracted_data.xlsx"
+        df.to_excel(output_path, index=False)
+        return output_path
+# Gradio interface
+interface = gr.Interface(
+    fn=extract_data,
+    inputs=[
+        gr.inputs.File(label="Upload PDF File"),
+        gr.inputs.Number(default=10, label="Start Position"),
+        gr.inputs.Number(default=450, label="End Position")
+    ],
+    outputs=gr.outputs.File(label="Download Extracted Excel")
+)
+interface.launch()