Spaces:
Sleeping
Sleeping
Update backend/app/main.py
Browse files- backend/app/main.py +16 -0
backend/app/main.py
CHANGED
|
@@ -104,6 +104,22 @@ async def extract_document(
|
|
| 104 |
|
| 105 |
confidence = float(extracted.get("confidence", 90))
|
| 106 |
fields = extracted.get("fields", {})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
fields_extracted = len(fields) if isinstance(fields, dict) else 0
|
| 108 |
|
| 109 |
print(f"[INFO] Final stats - confidence: {confidence}, fields_count: {fields_extracted}")
|
|
|
|
| 104 |
|
| 105 |
confidence = float(extracted.get("confidence", 90))
|
| 106 |
fields = extracted.get("fields", {})
|
| 107 |
+
|
| 108 |
+
# Include full_text in fields if present (for frontend display)
|
| 109 |
+
full_text = extracted.get("full_text", "")
|
| 110 |
+
if full_text:
|
| 111 |
+
fields["full_text"] = full_text
|
| 112 |
+
full_text_words = len(str(full_text).split())
|
| 113 |
+
print(f"[INFO] Full text extracted: {full_text_words} words")
|
| 114 |
+
|
| 115 |
+
# Also check for pages array
|
| 116 |
+
pages_data = extracted.get("pages", [])
|
| 117 |
+
if pages_data and isinstance(pages_data, list):
|
| 118 |
+
print(f"[INFO] Extracted text from {len(pages_data)} page(s)")
|
| 119 |
+
# Add pages to fields for frontend
|
| 120 |
+
fields["pages"] = pages_data
|
| 121 |
+
|
| 122 |
+
# Count fields, including full_text if present
|
| 123 |
fields_extracted = len(fields) if isinstance(fields, dict) else 0
|
| 124 |
|
| 125 |
print(f"[INFO] Final stats - confidence: {confidence}, fields_count: {fields_extracted}")
|