Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -49,8 +49,10 @@ def extract_text_from_pdf(pdf_bytes):
|
|
| 49 |
text = ""
|
| 50 |
for img in images:
|
| 51 |
text += pytesseract.image_to_string(img) + "\n"
|
|
|
|
| 52 |
return text
|
| 53 |
except Exception as e:
|
|
|
|
| 54 |
return f"Error extracting text: {str(e)}"
|
| 55 |
finally:
|
| 56 |
if os.path.exists(temp_path):
|
|
@@ -130,10 +132,10 @@ with gr.Blocks(title="Contract Intelligence App") as demo:
|
|
| 130 |
results = []
|
| 131 |
all_data = {}
|
| 132 |
all_risks = []
|
| 133 |
-
for file in files:
|
| 134 |
status, data, risks, _ = process_contract(file, obj_type)
|
| 135 |
-
results.append(f"{status} - File: {
|
| 136 |
-
all_data.update({f"File_{
|
| 137 |
all_risks.extend(risks)
|
| 138 |
progress = f"{len(files)}/{len(files)}"
|
| 139 |
return "\n".join(results), all_data, "\n".join(all_risks) if all_risks else "No risks detected", gr.update(value=progress)
|
|
|
|
| 49 |
text = ""
|
| 50 |
for img in images:
|
| 51 |
text += pytesseract.image_to_string(img) + "\n"
|
| 52 |
+
print(f"OCR completed - Extracted text length: {len(text)}")
|
| 53 |
return text
|
| 54 |
except Exception as e:
|
| 55 |
+
print(f"OCR failed: {str(e)}")
|
| 56 |
return f"Error extracting text: {str(e)}"
|
| 57 |
finally:
|
| 58 |
if os.path.exists(temp_path):
|
|
|
|
| 132 |
results = []
|
| 133 |
all_data = {}
|
| 134 |
all_risks = []
|
| 135 |
+
for i, file in enumerate(files):
|
| 136 |
status, data, risks, _ = process_contract(file, obj_type)
|
| 137 |
+
results.append(f"{status} - File: File_{i}")
|
| 138 |
+
all_data.update({f"File_{i}": data})
|
| 139 |
all_risks.extend(risks)
|
| 140 |
progress = f"{len(files)}/{len(files)}"
|
| 141 |
return "\n".join(results), all_data, "\n".join(all_risks) if all_risks else "No risks detected", gr.update(value=progress)
|