Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -43,24 +43,23 @@ def process_contract(uploaded_files, object_name, manual_mappings):
|
|
| 43 |
if not uploaded_files:
|
| 44 |
return "❌ No files uploaded.", None, failed_records, "0/0"
|
| 45 |
|
| 46 |
-
# Debug: Log uploaded files and their raw data
|
| 47 |
-
print(f"Received files (bytes): {len(uploaded_files)} files")
|
| 48 |
for i, file_bytes in enumerate(uploaded_files):
|
| 49 |
-
print(f"File {i} header: {file_bytes[:5]}")
|
| 50 |
|
| 51 |
# Check for poppler-utils
|
| 52 |
if not check_poppler():
|
| 53 |
return "❌ Error: poppler-utils is not installed or not in PATH. Please install it (e.g., 'sudo apt-get install poppler-utils' on Linux).", None, failed_records, "0/0"
|
| 54 |
|
| 55 |
-
# Mock Salesforce object fields
|
| 56 |
mock_object_fields = ["Name", "Description", "Amount", "Date"] if object_name else []
|
| 57 |
total_files = len(uploaded_files)
|
| 58 |
processed_files = 0
|
| 59 |
results = []
|
| 60 |
-
ai_result = None
|
| 61 |
with tqdm(total=total_files, desc="Processing PDFs") as pbar:
|
| 62 |
for i, file_bytes in enumerate(uploaded_files):
|
| 63 |
-
# Generate a filename based on index since name is not available with type="binary"
|
| 64 |
pdf_name = f"uploaded_file_{i}.pdf"
|
| 65 |
if not is_pdf_file(file_bytes):
|
| 66 |
save_failed_record(pdf_name, object_name, "Invalid PDF content", {})
|
|
@@ -75,7 +74,7 @@ def process_contract(uploaded_files, object_name, manual_mappings):
|
|
| 75 |
save_uploaded_file_details(pdf_name, temp_path)
|
| 76 |
|
| 77 |
try:
|
| 78 |
-
|
| 79 |
text_data = extract_text_from_pdf_with_tesseract_or_layoutlm(temp_path)
|
| 80 |
if not text_data:
|
| 81 |
save_failed_record(pdf_name, object_name, "No text extracted from PDF", {})
|
|
@@ -84,7 +83,7 @@ def process_contract(uploaded_files, object_name, manual_mappings):
|
|
| 84 |
pbar.update(1)
|
| 85 |
continue
|
| 86 |
|
| 87 |
-
|
| 88 |
key_values = extract_key_values_with_layoutlm(text_data, temp_path)
|
| 89 |
ai_result = run_ai_mapping_with_layoutlm(key_values, mock_object_fields, temp_path)
|
| 90 |
if ai_result['status'] == 'failed':
|
|
@@ -94,13 +93,11 @@ def process_contract(uploaded_files, object_name, manual_mappings):
|
|
| 94 |
pbar.update(1)
|
| 95 |
continue
|
| 96 |
|
| 97 |
-
# Apply manual mappings
|
| 98 |
mappings = {k: v for k, v in ai_result['mappings'].items()}
|
| 99 |
for field, value in manual_mappings.items():
|
| 100 |
if value and field in mock_object_fields:
|
| 101 |
mappings[field] = value
|
| 102 |
|
| 103 |
-
# Mock record creation success (EPIC 5 placeholder)
|
| 104 |
results.append(f"✅ {pdf_name}: Data processed locally (Mock ID: {hash(pdf_name)})")
|
| 105 |
processed_files += 1
|
| 106 |
pbar.update(1)
|
|
@@ -115,6 +112,7 @@ def process_contract(uploaded_files, object_name, manual_mappings):
|
|
| 115 |
uploaded_file_details[pdf_name]["processed"] = True
|
| 116 |
|
| 117 |
progress = f"{processed_files}/{total_files}"
|
|
|
|
| 118 |
return "\n".join(results), ai_result, failed_records, progress
|
| 119 |
|
| 120 |
def retry_failed_record(index, object_name, manual_mappings):
|
|
@@ -134,24 +132,21 @@ def retry_failed_record(index, object_name, manual_mappings):
|
|
| 134 |
|
| 135 |
# Gradio UI
|
| 136 |
with gr.Blocks(title="Smart Contract Migrator (Local Mode)") as demo:
|
| 137 |
-
# EPIC 1: PDF Upload Interface
|
| 138 |
with gr.Row():
|
| 139 |
uploaded_files = gr.File(type="binary", file_types=["pdf"], file_count="multiple", label="Upload Contract PDFs")
|
| 140 |
upload_progress = gr.Textbox(label="Upload Progress", value="0/0", interactive=False)
|
| 141 |
|
| 142 |
-
# EPIC 2: Mock Salesforce Object Mapping (placeholder)
|
| 143 |
object_name = gr.Dropdown(choices=["Contract", "Invoice", "Agreement"], label="Select Object Type (Mock)")
|
| 144 |
|
| 145 |
def update_fields(selected_object):
|
| 146 |
if selected_object:
|
| 147 |
-
mock_fields = ["Name", "Description", "Amount", "Date"]
|
| 148 |
return gr.update(visible=True, value="\n".join(mock_fields))
|
| 149 |
return gr.update(visible=False)
|
| 150 |
|
| 151 |
object_fields_output = gr.Textbox(label="Available Fields (Mock)", interactive=False)
|
| 152 |
object_name.change(fn=update_fields, inputs=object_name, outputs=object_fields_output)
|
| 153 |
|
| 154 |
-
# EPIC 3 & 4: Auto Mapping and OCR
|
| 155 |
manual_mapping_inputs = gr.State(value={})
|
| 156 |
def update_manual_mappings(selected_object):
|
| 157 |
if selected_object:
|
|
@@ -166,7 +161,6 @@ with gr.Blocks(title="Smart Contract Migrator (Local Mode)") as demo:
|
|
| 166 |
outputs=manual_mapping_inputs
|
| 167 |
)
|
| 168 |
|
| 169 |
-
# EPIC 5: Mock Record Processing
|
| 170 |
process_button = gr.Button("Extract, Map, and Process")
|
| 171 |
status_output = gr.Textbox(label="Status", interactive=False)
|
| 172 |
ai_result_output = gr.JSON(label="AI Mapping Results (High-Confidence Mappings)")
|
|
@@ -185,7 +179,6 @@ with gr.Blocks(title="Smart Contract Migrator (Local Mode)") as demo:
|
|
| 185 |
outputs=[status_output, ai_result_output, upload_progress]
|
| 186 |
)
|
| 187 |
|
| 188 |
-
# EPIC 6: Reconciliation & Retry
|
| 189 |
with gr.Tab("Reconciliation & Retry"):
|
| 190 |
failed_records_output = gr.Textbox(label="Failed Records", interactive=False, value="No failed records.")
|
| 191 |
|
|
|
|
| 43 |
if not uploaded_files:
|
| 44 |
return "❌ No files uploaded.", None, failed_records, "0/0"
|
| 45 |
|
| 46 |
+
# Debug: Log uploaded files and their raw data
|
| 47 |
+
print(f"Received files (bytes): {len(uploaded_files)} files at {len(uploaded_files)}")
|
| 48 |
for i, file_bytes in enumerate(uploaded_files):
|
| 49 |
+
print(f"File {i} header: {file_bytes[:5]} - Starting processing")
|
| 50 |
|
| 51 |
# Check for poppler-utils
|
| 52 |
if not check_poppler():
|
| 53 |
return "❌ Error: poppler-utils is not installed or not in PATH. Please install it (e.g., 'sudo apt-get install poppler-utils' on Linux).", None, failed_records, "0/0"
|
| 54 |
|
| 55 |
+
# Mock Salesforce object fields
|
| 56 |
mock_object_fields = ["Name", "Description", "Amount", "Date"] if object_name else []
|
| 57 |
total_files = len(uploaded_files)
|
| 58 |
processed_files = 0
|
| 59 |
results = []
|
| 60 |
+
ai_result = None
|
| 61 |
with tqdm(total=total_files, desc="Processing PDFs") as pbar:
|
| 62 |
for i, file_bytes in enumerate(uploaded_files):
|
|
|
|
| 63 |
pdf_name = f"uploaded_file_{i}.pdf"
|
| 64 |
if not is_pdf_file(file_bytes):
|
| 65 |
save_failed_record(pdf_name, object_name, "Invalid PDF content", {})
|
|
|
|
| 74 |
save_uploaded_file_details(pdf_name, temp_path)
|
| 75 |
|
| 76 |
try:
|
| 77 |
+
print(f"Processing {pdf_name} - OCR stage")
|
| 78 |
text_data = extract_text_from_pdf_with_tesseract_or_layoutlm(temp_path)
|
| 79 |
if not text_data:
|
| 80 |
save_failed_record(pdf_name, object_name, "No text extracted from PDF", {})
|
|
|
|
| 83 |
pbar.update(1)
|
| 84 |
continue
|
| 85 |
|
| 86 |
+
print(f"Processing {pdf_name} - AI mapping stage")
|
| 87 |
key_values = extract_key_values_with_layoutlm(text_data, temp_path)
|
| 88 |
ai_result = run_ai_mapping_with_layoutlm(key_values, mock_object_fields, temp_path)
|
| 89 |
if ai_result['status'] == 'failed':
|
|
|
|
| 93 |
pbar.update(1)
|
| 94 |
continue
|
| 95 |
|
|
|
|
| 96 |
mappings = {k: v for k, v in ai_result['mappings'].items()}
|
| 97 |
for field, value in manual_mappings.items():
|
| 98 |
if value and field in mock_object_fields:
|
| 99 |
mappings[field] = value
|
| 100 |
|
|
|
|
| 101 |
results.append(f"✅ {pdf_name}: Data processed locally (Mock ID: {hash(pdf_name)})")
|
| 102 |
processed_files += 1
|
| 103 |
pbar.update(1)
|
|
|
|
| 112 |
uploaded_file_details[pdf_name]["processed"] = True
|
| 113 |
|
| 114 |
progress = f"{processed_files}/{total_files}"
|
| 115 |
+
print(f"Processing completed - Results: {results}, Progress: {progress}")
|
| 116 |
return "\n".join(results), ai_result, failed_records, progress
|
| 117 |
|
| 118 |
def retry_failed_record(index, object_name, manual_mappings):
|
|
|
|
| 132 |
|
| 133 |
# Gradio UI
|
| 134 |
with gr.Blocks(title="Smart Contract Migrator (Local Mode)") as demo:
|
|
|
|
| 135 |
with gr.Row():
|
| 136 |
uploaded_files = gr.File(type="binary", file_types=["pdf"], file_count="multiple", label="Upload Contract PDFs")
|
| 137 |
upload_progress = gr.Textbox(label="Upload Progress", value="0/0", interactive=False)
|
| 138 |
|
|
|
|
| 139 |
object_name = gr.Dropdown(choices=["Contract", "Invoice", "Agreement"], label="Select Object Type (Mock)")
|
| 140 |
|
| 141 |
def update_fields(selected_object):
|
| 142 |
if selected_object:
|
| 143 |
+
mock_fields = ["Name", "Description", "Amount", "Date"]
|
| 144 |
return gr.update(visible=True, value="\n".join(mock_fields))
|
| 145 |
return gr.update(visible=False)
|
| 146 |
|
| 147 |
object_fields_output = gr.Textbox(label="Available Fields (Mock)", interactive=False)
|
| 148 |
object_name.change(fn=update_fields, inputs=object_name, outputs=object_fields_output)
|
| 149 |
|
|
|
|
| 150 |
manual_mapping_inputs = gr.State(value={})
|
| 151 |
def update_manual_mappings(selected_object):
|
| 152 |
if selected_object:
|
|
|
|
| 161 |
outputs=manual_mapping_inputs
|
| 162 |
)
|
| 163 |
|
|
|
|
| 164 |
process_button = gr.Button("Extract, Map, and Process")
|
| 165 |
status_output = gr.Textbox(label="Status", interactive=False)
|
| 166 |
ai_result_output = gr.JSON(label="AI Mapping Results (High-Confidence Mappings)")
|
|
|
|
| 179 |
outputs=[status_output, ai_result_output, upload_progress]
|
| 180 |
)
|
| 181 |
|
|
|
|
| 182 |
with gr.Tab("Reconciliation & Retry"):
|
| 183 |
failed_records_output = gr.Textbox(label="Failed Records", interactive=False, value="No failed records.")
|
| 184 |
|