Spaces:

jithenderchoudary
/

poext

Sleeping

App Files Files Community

jithenderchoudary commited on Nov 4, 2024

Commit

47d2da2

verified ·

1 Parent(s): 05a3ebf

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -37

app.py CHANGED Viewed

@@ -4,46 +4,54 @@ import gradio as gr
 import tempfile
 def extract_po_to_excel(pdf_file):
-    # Load PDF and extract text
-    with fitz.open(pdf_file.name) as pdf:
-        data = []
-        for page_num in range(pdf.page_count):
-            page = pdf[page_num]
-            text = page.get_text("text")
-            # Simple example of extraction (customize parsing as needed)
-            lines = text.splitlines()
-            for line in lines:
-                # Only extract lines with known keywords (sample logic; adjust as necessary)
-                if "Pos." in line or "Item Code" in line:
-                    data.append(line)
-    # Example structure, parse `data` into structured format
-    structured_data = []
-    for line in data:
-        # Custom parsing logic goes here; here's a basic split by spaces
-        # Adjust parsing to match your actual data needs
-        parts = line.split()
-        if len(parts) > 1:
-            structured_data.append({
-                "Position": parts[0],
-                "Item Code": parts[1],
-                # Extract other fields as needed
-            })
-    # Create DataFrame and export to Excel
-    df = pd.DataFrame(structured_data)
-    # Save to temporary file
-    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
-    df.to_excel(temp_file.name, index=False)
-    temp_file.close()
-    return temp_file.name
 def main(pdf_file):
     excel_file_path = extract_po_to_excel(pdf_file)
-    return excel_file_path
 # Gradio interface
 interface = gr.Interface(
@@ -57,3 +65,4 @@ interface = gr.Interface(
 if __name__ == "__main__":
     interface.launch()

 import tempfile
 def extract_po_to_excel(pdf_file):
+    try:
+        # Load PDF and extract text
+        with fitz.open(pdf_file.name) as pdf:
+            data = []
+            for page_num in range(pdf.page_count):
+                page = pdf[page_num]
+                text = page.get_text("text")
+                # Simple example of extraction (customize parsing as needed)
+                lines = text.splitlines()
+                for line in lines:
+                    # Only extract lines with known keywords (sample logic; adjust as necessary)
+                    if "Pos." in line or "Item Code" in line:
+                        data.append(line)
+        # Example structure, parse `data` into structured format
+        structured_data = []
+        for line in data:
+            # Custom parsing logic goes here; here's a basic split by spaces
+            # Adjust parsing to match your actual data needs
+            parts = line.split()
+            if len(parts) > 1:
+                structured_data.append({
+                    "Position": parts[0],
+                    "Item Code": parts[1],
+                    # Extract other fields as needed
+                })
+        # Create DataFrame and export to Excel
+        df = pd.DataFrame(structured_data)
+        # Save to temporary file
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
+        df.to_excel(temp_file.name, index=False)
+        temp_file.close()
+        return temp_file.name
+    except Exception as e:
+        print(f"Error: {e}")
+        return None
 def main(pdf_file):
     excel_file_path = extract_po_to_excel(pdf_file)
+    if excel_file_path:
+        return excel_file_path
+    else:
+        return "Error: Failed to process the PDF file."
 # Gradio interface
 interface = gr.Interface(
 if __name__ == "__main__":
     interface.launch()