pavansuresh commited on
Commit
9230bf8
·
verified ·
1 Parent(s): a57929a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -16
app.py CHANGED
@@ -43,24 +43,23 @@ def process_contract(uploaded_files, object_name, manual_mappings):
43
  if not uploaded_files:
44
  return "❌ No files uploaded.", None, failed_records, "0/0"
45
 
46
- # Debug: Log uploaded files and their raw data (first few bytes for debugging)
47
- print(f"Received files (bytes): {len(uploaded_files)} files")
48
  for i, file_bytes in enumerate(uploaded_files):
49
- print(f"File {i} header: {file_bytes[:5]}")
50
 
51
  # Check for poppler-utils
52
  if not check_poppler():
53
  return "❌ Error: poppler-utils is not installed or not in PATH. Please install it (e.g., 'sudo apt-get install poppler-utils' on Linux).", None, failed_records, "0/0"
54
 
55
- # Mock Salesforce object fields (replace with dynamic logic later)
56
  mock_object_fields = ["Name", "Description", "Amount", "Date"] if object_name else []
57
  total_files = len(uploaded_files)
58
  processed_files = 0
59
  results = []
60
- ai_result = None # Initialize to avoid UnboundLocalError
61
  with tqdm(total=total_files, desc="Processing PDFs") as pbar:
62
  for i, file_bytes in enumerate(uploaded_files):
63
- # Generate a filename based on index since name is not available with type="binary"
64
  pdf_name = f"uploaded_file_{i}.pdf"
65
  if not is_pdf_file(file_bytes):
66
  save_failed_record(pdf_name, object_name, "Invalid PDF content", {})
@@ -75,7 +74,7 @@ def process_contract(uploaded_files, object_name, manual_mappings):
75
  save_uploaded_file_details(pdf_name, temp_path)
76
 
77
  try:
78
- # EPIC 4: OCR and Data Extraction
79
  text_data = extract_text_from_pdf_with_tesseract_or_layoutlm(temp_path)
80
  if not text_data:
81
  save_failed_record(pdf_name, object_name, "No text extracted from PDF", {})
@@ -84,7 +83,7 @@ def process_contract(uploaded_files, object_name, manual_mappings):
84
  pbar.update(1)
85
  continue
86
 
87
- # EPIC 3: Auto Mapping via AI
88
  key_values = extract_key_values_with_layoutlm(text_data, temp_path)
89
  ai_result = run_ai_mapping_with_layoutlm(key_values, mock_object_fields, temp_path)
90
  if ai_result['status'] == 'failed':
@@ -94,13 +93,11 @@ def process_contract(uploaded_files, object_name, manual_mappings):
94
  pbar.update(1)
95
  continue
96
 
97
- # Apply manual mappings
98
  mappings = {k: v for k, v in ai_result['mappings'].items()}
99
  for field, value in manual_mappings.items():
100
  if value and field in mock_object_fields:
101
  mappings[field] = value
102
 
103
- # Mock record creation success (EPIC 5 placeholder)
104
  results.append(f"✅ {pdf_name}: Data processed locally (Mock ID: {hash(pdf_name)})")
105
  processed_files += 1
106
  pbar.update(1)
@@ -115,6 +112,7 @@ def process_contract(uploaded_files, object_name, manual_mappings):
115
  uploaded_file_details[pdf_name]["processed"] = True
116
 
117
  progress = f"{processed_files}/{total_files}"
 
118
  return "\n".join(results), ai_result, failed_records, progress
119
 
120
  def retry_failed_record(index, object_name, manual_mappings):
@@ -134,24 +132,21 @@ def retry_failed_record(index, object_name, manual_mappings):
134
 
135
  # Gradio UI
136
  with gr.Blocks(title="Smart Contract Migrator (Local Mode)") as demo:
137
- # EPIC 1: PDF Upload Interface
138
  with gr.Row():
139
  uploaded_files = gr.File(type="binary", file_types=["pdf"], file_count="multiple", label="Upload Contract PDFs")
140
  upload_progress = gr.Textbox(label="Upload Progress", value="0/0", interactive=False)
141
 
142
- # EPIC 2: Mock Salesforce Object Mapping (placeholder)
143
  object_name = gr.Dropdown(choices=["Contract", "Invoice", "Agreement"], label="Select Object Type (Mock)")
144
 
145
  def update_fields(selected_object):
146
  if selected_object:
147
- mock_fields = ["Name", "Description", "Amount", "Date"] # Mock fields
148
  return gr.update(visible=True, value="\n".join(mock_fields))
149
  return gr.update(visible=False)
150
 
151
  object_fields_output = gr.Textbox(label="Available Fields (Mock)", interactive=False)
152
  object_name.change(fn=update_fields, inputs=object_name, outputs=object_fields_output)
153
 
154
- # EPIC 3 & 4: Auto Mapping and OCR
155
  manual_mapping_inputs = gr.State(value={})
156
  def update_manual_mappings(selected_object):
157
  if selected_object:
@@ -166,7 +161,6 @@ with gr.Blocks(title="Smart Contract Migrator (Local Mode)") as demo:
166
  outputs=manual_mapping_inputs
167
  )
168
 
169
- # EPIC 5: Mock Record Processing
170
  process_button = gr.Button("Extract, Map, and Process")
171
  status_output = gr.Textbox(label="Status", interactive=False)
172
  ai_result_output = gr.JSON(label="AI Mapping Results (High-Confidence Mappings)")
@@ -185,7 +179,6 @@ with gr.Blocks(title="Smart Contract Migrator (Local Mode)") as demo:
185
  outputs=[status_output, ai_result_output, upload_progress]
186
  )
187
 
188
- # EPIC 6: Reconciliation & Retry
189
  with gr.Tab("Reconciliation & Retry"):
190
  failed_records_output = gr.Textbox(label="Failed Records", interactive=False, value="No failed records.")
191
 
 
43
  if not uploaded_files:
44
  return "❌ No files uploaded.", None, failed_records, "0/0"
45
 
46
+ # Debug: Log uploaded files and their raw data
47
+ print(f"Received files (bytes): {len(uploaded_files)} files at {len(uploaded_files)}")
48
  for i, file_bytes in enumerate(uploaded_files):
49
+ print(f"File {i} header: {file_bytes[:5]} - Starting processing")
50
 
51
  # Check for poppler-utils
52
  if not check_poppler():
53
  return "❌ Error: poppler-utils is not installed or not in PATH. Please install it (e.g., 'sudo apt-get install poppler-utils' on Linux).", None, failed_records, "0/0"
54
 
55
+ # Mock Salesforce object fields
56
  mock_object_fields = ["Name", "Description", "Amount", "Date"] if object_name else []
57
  total_files = len(uploaded_files)
58
  processed_files = 0
59
  results = []
60
+ ai_result = None
61
  with tqdm(total=total_files, desc="Processing PDFs") as pbar:
62
  for i, file_bytes in enumerate(uploaded_files):
 
63
  pdf_name = f"uploaded_file_{i}.pdf"
64
  if not is_pdf_file(file_bytes):
65
  save_failed_record(pdf_name, object_name, "Invalid PDF content", {})
 
74
  save_uploaded_file_details(pdf_name, temp_path)
75
 
76
  try:
77
+ print(f"Processing {pdf_name} - OCR stage")
78
  text_data = extract_text_from_pdf_with_tesseract_or_layoutlm(temp_path)
79
  if not text_data:
80
  save_failed_record(pdf_name, object_name, "No text extracted from PDF", {})
 
83
  pbar.update(1)
84
  continue
85
 
86
+ print(f"Processing {pdf_name} - AI mapping stage")
87
  key_values = extract_key_values_with_layoutlm(text_data, temp_path)
88
  ai_result = run_ai_mapping_with_layoutlm(key_values, mock_object_fields, temp_path)
89
  if ai_result['status'] == 'failed':
 
93
  pbar.update(1)
94
  continue
95
 
 
96
  mappings = {k: v for k, v in ai_result['mappings'].items()}
97
  for field, value in manual_mappings.items():
98
  if value and field in mock_object_fields:
99
  mappings[field] = value
100
 
 
101
  results.append(f"✅ {pdf_name}: Data processed locally (Mock ID: {hash(pdf_name)})")
102
  processed_files += 1
103
  pbar.update(1)
 
112
  uploaded_file_details[pdf_name]["processed"] = True
113
 
114
  progress = f"{processed_files}/{total_files}"
115
+ print(f"Processing completed - Results: {results}, Progress: {progress}")
116
  return "\n".join(results), ai_result, failed_records, progress
117
 
118
  def retry_failed_record(index, object_name, manual_mappings):
 
132
 
133
  # Gradio UI
134
  with gr.Blocks(title="Smart Contract Migrator (Local Mode)") as demo:
 
135
  with gr.Row():
136
  uploaded_files = gr.File(type="binary", file_types=["pdf"], file_count="multiple", label="Upload Contract PDFs")
137
  upload_progress = gr.Textbox(label="Upload Progress", value="0/0", interactive=False)
138
 
 
139
  object_name = gr.Dropdown(choices=["Contract", "Invoice", "Agreement"], label="Select Object Type (Mock)")
140
 
141
  def update_fields(selected_object):
142
  if selected_object:
143
+ mock_fields = ["Name", "Description", "Amount", "Date"]
144
  return gr.update(visible=True, value="\n".join(mock_fields))
145
  return gr.update(visible=False)
146
 
147
  object_fields_output = gr.Textbox(label="Available Fields (Mock)", interactive=False)
148
  object_name.change(fn=update_fields, inputs=object_name, outputs=object_fields_output)
149
 
 
150
  manual_mapping_inputs = gr.State(value={})
151
  def update_manual_mappings(selected_object):
152
  if selected_object:
 
161
  outputs=manual_mapping_inputs
162
  )
163
 
 
164
  process_button = gr.Button("Extract, Map, and Process")
165
  status_output = gr.Textbox(label="Status", interactive=False)
166
  ai_result_output = gr.JSON(label="AI Mapping Results (High-Confidence Mappings)")
 
179
  outputs=[status_output, ai_result_output, upload_progress]
180
  )
181
 
 
182
  with gr.Tab("Reconciliation & Retry"):
183
  failed_records_output = gr.Textbox(label="Failed Records", interactive=False, value="No failed records.")
184