Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import os
|
| 2 |
import io
|
| 3 |
import base64
|
|
|
|
| 4 |
import numpy as np
|
| 5 |
from PIL import Image
|
| 6 |
from fastapi import FastAPI, Request
|
|
@@ -40,7 +41,38 @@ def parse_lsb_form(text):
|
|
| 40 |
if "LAPORAN SUMBER BAHAYA" in text:
|
| 41 |
result["jenis_dokumen"] = "LAPORAN SUMBER BAHAYA"
|
| 42 |
|
| 43 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
lines = text.split('\n')
|
| 45 |
|
| 46 |
# Dictionary untuk menyimpan kunci pencarian dan nama field
|
|
@@ -84,6 +116,18 @@ def parse_lsb_form(text):
|
|
| 84 |
result[current_field] += " " + line
|
| 85 |
else:
|
| 86 |
result[current_field] = line
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
return result
|
| 89 |
|
|
@@ -174,4 +218,4 @@ async def predict_route(request: Request):
|
|
| 174 |
if __name__ == "__main__":
|
| 175 |
# For local development
|
| 176 |
port = int(os.environ.get("PORT", 7860))
|
| 177 |
-
uvicorn.run(app, host="0.0.0.0", port=port)
|
|
|
|
| 1 |
import os
|
| 2 |
import io
|
| 3 |
import base64
|
| 4 |
+
import re
|
| 5 |
import numpy as np
|
| 6 |
from PIL import Image
|
| 7 |
from fastapi import FastAPI, Request
|
|
|
|
| 41 |
if "LAPORAN SUMBER BAHAYA" in text:
|
| 42 |
result["jenis_dokumen"] = "LAPORAN SUMBER BAHAYA"
|
| 43 |
|
| 44 |
+
# Pattern regex untuk menemukan field-field umum pada form LSB
|
| 45 |
+
patterns = {
|
| 46 |
+
"nama_pelapor": r"(?:NAMA\s*PELAPOR|PELAPOR)[:\s]*([^\n]+)",
|
| 47 |
+
"lokasi": r"(?:LOKASI\s*KEJADIAN|LOKASI)[:\s]*([^\n]+)",
|
| 48 |
+
"tanggal": r"(?:TANGGAL\s*/?\s*WAKTU|TANGGAL)[:\s]*([^\n]+)",
|
| 49 |
+
"posisi_jabatan": r"(?:POSISI\s*/?\s*JABATAN|JABATAN)[:\s]*([^\n]+)",
|
| 50 |
+
"jenis_pengamatan": r"(?:JENIS\s*PENGAMATAN)[:\s]*([^\n]+)",
|
| 51 |
+
"uraian_pengamatan": r"(?:URAIAN\s*PENGAMATAN)[:\s]*([^\n]+)",
|
| 52 |
+
"bahaya": r"(?:BAHAYA)[:\s]*([^\n]+)",
|
| 53 |
+
"tindakan_intervensi": r"(?:TINDAKAN\s*INTERVENSI)[:\s/]*([^\n]+)",
|
| 54 |
+
"saran_perbaikan": r"(?:SARAN\s*PERBAIKAN)[:\s]*([^\n]+)"
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
# Cari semua pola dalam teks
|
| 58 |
+
for field_name, pattern in patterns.items():
|
| 59 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 60 |
+
if match:
|
| 61 |
+
result[field_name] = match.group(1).strip()
|
| 62 |
+
|
| 63 |
+
# Untuk jenis pengamatan, cek juga pilihan yang dicentang
|
| 64 |
+
if "jenis_pengamatan" not in result:
|
| 65 |
+
# Cek untuk Unsafe Condition
|
| 66 |
+
if re.search(r"(?:Unsafe\s*Condition|Kondisi\s*Tidak\s*Aman)[:\s]*", text, re.IGNORECASE):
|
| 67 |
+
result["jenis_pengamatan"] = "Unsafe Condition"
|
| 68 |
+
# Cek untuk Unsafe Action
|
| 69 |
+
elif re.search(r"(?:Unsafe\s*Action|Tindakan\s*Tidak\s*Aman)[:\s]*", text, re.IGNORECASE):
|
| 70 |
+
result["jenis_pengamatan"] = "Unsafe Action"
|
| 71 |
+
# Cek untuk Intervensi
|
| 72 |
+
elif re.search(r"(?:Intervensi)[:\s]*", text, re.IGNORECASE):
|
| 73 |
+
result["jenis_pengamatan"] = "Intervensi"
|
| 74 |
+
|
| 75 |
+
# Pengolahan tambahan untuk mengekstrak informasi dari teks
|
| 76 |
lines = text.split('\n')
|
| 77 |
|
| 78 |
# Dictionary untuk menyimpan kunci pencarian dan nama field
|
|
|
|
| 116 |
result[current_field] += " " + line
|
| 117 |
else:
|
| 118 |
result[current_field] = line
|
| 119 |
+
|
| 120 |
+
# Deteksi nomor LSB jika ada
|
| 121 |
+
no_lsb_match = re.search(r"No\.\s*LSB\s*:?\s*([^\n]+)", text, re.IGNORECASE)
|
| 122 |
+
if no_lsb_match:
|
| 123 |
+
result["no_lsb"] = no_lsb_match.group(1).strip()
|
| 124 |
+
|
| 125 |
+
# Format ulang data jika ditemukan format yang tidak standar
|
| 126 |
+
if "nama_pelapor" in result and len(result["nama_pelapor"]) > 50:
|
| 127 |
+
# Nama pelapor terlalu panjang, mungkin salah deteksi
|
| 128 |
+
shorter_name = re.search(r"([A-Za-z\s]{2,30})", result["nama_pelapor"])
|
| 129 |
+
if shorter_name:
|
| 130 |
+
result["nama_pelapor"] = shorter_name.group(1).strip()
|
| 131 |
|
| 132 |
return result
|
| 133 |
|
|
|
|
| 218 |
if __name__ == "__main__":
|
| 219 |
# For local development
|
| 220 |
port = int(os.environ.get("PORT", 7860))
|
| 221 |
+
uvicorn.run(app, host="0.0.0.0", port=port)
|