jithenderchoudary commited on
Commit
fc6645c
·
verified ·
1 Parent(s): 9c8fd6c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -65
app.py CHANGED
@@ -1,14 +1,13 @@
1
  import gradio as gr
2
  import pdfplumber
3
  import pandas as pd
4
- import openpyxl # Explicitly import openpyxl for Excel support
5
  import os
6
- import pandas as pd
7
 
8
- # Define the path
9
  input_file_path = '/mnt/data/extracted_data (1).xlsx'
 
10
 
11
- # Check if the directory exists and create it if it doesn't
12
  if not os.path.exists('/mnt/data/'):
13
  os.makedirs('/mnt/data/')
14
 
@@ -20,53 +19,19 @@ if os.path.exists(input_file_path):
20
  else:
21
  print(f"File not found: {input_file_path}. Please ensure the file is in the directory.")
22
 
23
-
24
  directory_path = "/mnt/data/"
25
-
26
  if os.path.exists(directory_path):
27
- # List files if the directory exists
28
  files = os.listdir(directory_path)
29
  print("Files in /mnt/data/:", files)
30
  else:
31
- print("Directory does not exist:", directory_path)
32
- # Optionally, create the directory if needed
33
  os.makedirs(directory_path)
34
  print("Directory created:", directory_path)
35
 
36
-
37
- # Primary directory
38
- directory_path = "/mnt/data/"
39
-
40
- # Check if primary directory exists; use current directory as fallback
41
- if not os.path.exists(directory_path):
42
- print(f"Directory {directory_path} does not exist. Using current directory instead.")
43
- directory_path = "."
44
-
45
- # List files in the confirmed directory path
46
- files = os.listdir(directory_path)
47
- print("Files in directory:", files)
48
-
49
-
50
- directory_path = "/mnt/data/"
51
-
52
- try:
53
- files = os.listdir(directory_path)
54
- print("Files in /mnt/data/:", files)
55
- except FileNotFoundError:
56
- print(f"Error: The directory {directory_path} does not exist.")
57
- # Handle the error by either using another directory or creating the directory
58
- os.makedirs(directory_path)
59
- print("Created directory:", directory_path)
60
-
61
-
62
- # List all files in the /mnt/data/ directory
63
- files = os.listdir("/mnt/data/")
64
- print("Files in /mnt/data/:", files)
65
-
66
-
67
  def extract_data(pdf_file_path, start_pos, end_pos):
68
  try:
69
- # Attempt to load and process the PDF
70
  with pdfplumber.open(pdf_file_path) as pdf:
71
  data = []
72
  for page in pdf.pages:
@@ -74,15 +39,11 @@ def extract_data(pdf_file_path, start_pos, end_pos):
74
  if text is None:
75
  return "Error: Could not extract text from the PDF. Please check the file format."
76
 
77
- # Print text for debugging
78
  print("Extracted Text:", text) # Debugging line
79
 
80
- # Placeholder data extraction logic; replace with actual extraction
81
- # Append rows within start_pos to end_pos range
82
-
83
- # Example data structure to simulate output
84
  extracted_data = {
85
- "Pos": [10, 20, 30], # Replace with actual data
86
  "Item Code": ["155569003011", "155569003012", "155569003013"],
87
  "Quantity": [10, 10, 10],
88
  "Basic Price": [57.66, 57.66, 57.66],
@@ -91,11 +52,9 @@ def extract_data(pdf_file_path, start_pos, end_pos):
91
 
92
  # Convert to DataFrame and save to Excel
93
  df = pd.DataFrame(extracted_data)
94
- output_path = "/tmp/extracted_data.xlsx"
95
  df.to_excel(output_path, index=False)
96
 
97
- # Verify that file exists
98
- import os
99
  if os.path.exists(output_path):
100
  print("File saved successfully:", output_path)
101
  return output_path
@@ -103,11 +62,10 @@ def extract_data(pdf_file_path, start_pos, end_pos):
103
  return "Error: Failed to save the Excel file."
104
 
105
  except Exception as e:
106
- # Log and return any exceptions
107
  print("Error encountered:", str(e))
108
  return f"Error: {e}"
109
 
110
- # Gradio interface setup
111
  interface = gr.Interface(
112
  fn=extract_data,
113
  inputs=[
@@ -117,19 +75,15 @@ interface = gr.Interface(
117
  ],
118
  outputs=gr.File(label="Download Extracted Excel")
119
  )
120
- # Load the uploaded Excel file
121
- input_file_path = '/mnt/data/extracted_data (1).xlsx' # Update with actual path if needed
122
- output_file_path = '/mnt/data/filtered_positions_10_to_450.xlsx'
123
 
124
- # Read the Excel file into a DataFrame
125
- df = pd.read_excel(input_file_path)
126
-
127
- # Filter the DataFrame for positions between 10 and 450
128
- filtered_df = df[(df['Pos'] >= 10) & (df['Pos'] <= 450)]
129
-
130
- # Save the filtered data to a new Excel file
131
- filtered_df.to_excel(output_file_path, index=False)
132
-
133
- output_file_path # Path to the filtered Excel file
134
 
135
  interface.launch()
 
1
  import gradio as gr
2
  import pdfplumber
3
  import pandas as pd
 
4
  import os
 
5
 
6
+ # Define the path for input and output
7
  input_file_path = '/mnt/data/extracted_data (1).xlsx'
8
+ output_file_path = '/mnt/data/filtered_positions_10_to_450.xlsx'
9
 
10
+ # Ensure the directory exists
11
  if not os.path.exists('/mnt/data/'):
12
  os.makedirs('/mnt/data/')
13
 
 
19
  else:
20
  print(f"File not found: {input_file_path}. Please ensure the file is in the directory.")
21
 
22
+ # Check if directory exists and list files
23
  directory_path = "/mnt/data/"
 
24
  if os.path.exists(directory_path):
 
25
  files = os.listdir(directory_path)
26
  print("Files in /mnt/data/:", files)
27
  else:
 
 
28
  os.makedirs(directory_path)
29
  print("Directory created:", directory_path)
30
 
31
+ # Define the function to extract data from PDF
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  def extract_data(pdf_file_path, start_pos, end_pos):
33
  try:
34
+ # Load and process the PDF
35
  with pdfplumber.open(pdf_file_path) as pdf:
36
  data = []
37
  for page in pdf.pages:
 
39
  if text is None:
40
  return "Error: Could not extract text from the PDF. Please check the file format."
41
 
 
42
  print("Extracted Text:", text) # Debugging line
43
 
44
+ # Example extracted data structure
 
 
 
45
  extracted_data = {
46
+ "Pos": [10, 20, 30],
47
  "Item Code": ["155569003011", "155569003012", "155569003013"],
48
  "Quantity": [10, 10, 10],
49
  "Basic Price": [57.66, 57.66, 57.66],
 
52
 
53
  # Convert to DataFrame and save to Excel
54
  df = pd.DataFrame(extracted_data)
55
+ output_path = "/mnt/data/extracted_data.xlsx"
56
  df.to_excel(output_path, index=False)
57
 
 
 
58
  if os.path.exists(output_path):
59
  print("File saved successfully:", output_path)
60
  return output_path
 
62
  return "Error: Failed to save the Excel file."
63
 
64
  except Exception as e:
 
65
  print("Error encountered:", str(e))
66
  return f"Error: {e}"
67
 
68
+ # Set up Gradio interface
69
  interface = gr.Interface(
70
  fn=extract_data,
71
  inputs=[
 
75
  ],
76
  outputs=gr.File(label="Download Extracted Excel")
77
  )
 
 
 
78
 
79
+ # Additional Excel filtering logic
80
+ if os.path.exists(input_file_path):
81
+ df = pd.read_excel(input_file_path)
82
+ # Filter for positions between 10 and 450
83
+ filtered_df = df[(df['Pos'] >= 10) & (df['Pos'] <= 450)]
84
+ filtered_df.to_excel(output_file_path, index=False)
85
+ print(f"Filtered data saved to: {output_file_path}")
86
+ else:
87
+ print(f"Input file not found: {input_file_path}. Skipping filtering.")
 
88
 
89
  interface.launch()