jithenderchoudary commited on
Commit
47d2da2
·
verified ·
1 Parent(s): 05a3ebf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -37
app.py CHANGED
@@ -4,46 +4,54 @@ import gradio as gr
4
  import tempfile
5
 
6
  def extract_po_to_excel(pdf_file):
7
- # Load PDF and extract text
8
- with fitz.open(pdf_file.name) as pdf:
9
- data = []
10
- for page_num in range(pdf.page_count):
11
- page = pdf[page_num]
12
- text = page.get_text("text")
13
-
14
- # Simple example of extraction (customize parsing as needed)
15
- lines = text.splitlines()
16
- for line in lines:
17
- # Only extract lines with known keywords (sample logic; adjust as necessary)
18
- if "Pos." in line or "Item Code" in line:
19
- data.append(line)
20
-
21
- # Example structure, parse `data` into structured format
22
- structured_data = []
23
- for line in data:
24
- # Custom parsing logic goes here; here's a basic split by spaces
25
- # Adjust parsing to match your actual data needs
26
- parts = line.split()
27
- if len(parts) > 1:
28
- structured_data.append({
29
- "Position": parts[0],
30
- "Item Code": parts[1],
31
- # Extract other fields as needed
32
- })
33
-
34
- # Create DataFrame and export to Excel
35
- df = pd.DataFrame(structured_data)
36
-
37
- # Save to temporary file
38
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
39
- df.to_excel(temp_file.name, index=False)
40
- temp_file.close()
41
-
42
- return temp_file.name
 
 
 
 
 
43
 
44
  def main(pdf_file):
45
  excel_file_path = extract_po_to_excel(pdf_file)
46
- return excel_file_path
 
 
 
47
 
48
  # Gradio interface
49
  interface = gr.Interface(
@@ -57,3 +65,4 @@ interface = gr.Interface(
57
  if __name__ == "__main__":
58
  interface.launch()
59
 
 
 
4
  import tempfile
5
 
6
  def extract_po_to_excel(pdf_file):
7
+ try:
8
+ # Load PDF and extract text
9
+ with fitz.open(pdf_file.name) as pdf:
10
+ data = []
11
+ for page_num in range(pdf.page_count):
12
+ page = pdf[page_num]
13
+ text = page.get_text("text")
14
+
15
+ # Simple example of extraction (customize parsing as needed)
16
+ lines = text.splitlines()
17
+ for line in lines:
18
+ # Only extract lines with known keywords (sample logic; adjust as necessary)
19
+ if "Pos." in line or "Item Code" in line:
20
+ data.append(line)
21
+
22
+ # Example structure, parse `data` into structured format
23
+ structured_data = []
24
+ for line in data:
25
+ # Custom parsing logic goes here; here's a basic split by spaces
26
+ # Adjust parsing to match your actual data needs
27
+ parts = line.split()
28
+ if len(parts) > 1:
29
+ structured_data.append({
30
+ "Position": parts[0],
31
+ "Item Code": parts[1],
32
+ # Extract other fields as needed
33
+ })
34
+
35
+ # Create DataFrame and export to Excel
36
+ df = pd.DataFrame(structured_data)
37
+
38
+ # Save to temporary file
39
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx")
40
+ df.to_excel(temp_file.name, index=False)
41
+ temp_file.close()
42
+
43
+ return temp_file.name
44
+
45
+ except Exception as e:
46
+ print(f"Error: {e}")
47
+ return None
48
 
49
  def main(pdf_file):
50
  excel_file_path = extract_po_to_excel(pdf_file)
51
+ if excel_file_path:
52
+ return excel_file_path
53
+ else:
54
+ return "Error: Failed to process the PDF file."
55
 
56
  # Gradio interface
57
  interface = gr.Interface(
 
65
  if __name__ == "__main__":
66
  interface.launch()
67
 
68
+