Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -228,11 +228,16 @@ def extract_fields_from_image(image_path, progress=None):
|
|
| 228 |
progress(0.25, desc="Preparing prompt...")
|
| 229 |
|
| 230 |
# Prompt engineering: Carefully crafted prompt for multilingual form extraction
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
|
| 237 |
|
| 238 |
if progress:
|
|
|
|
| 228 |
progress(0.25, desc="Preparing prompt...")
|
| 229 |
|
| 230 |
# Prompt engineering: Carefully crafted prompt for multilingual form extraction
|
| 231 |
+
prompt = (
|
| 232 |
+
"<image_soft_token> Extract ALL possible form field labels from this image. "
|
| 233 |
+
"Return ONLY a JSON object where each key is a field name and each value is 'text'. "
|
| 234 |
+
"For Hindi/Devanagari forms, preserve the original script. "
|
| 235 |
+
"Do NOT include any introductory or concluding text. "
|
| 236 |
+
"If a field is partially visible, still include it. "
|
| 237 |
+
"Sample output: {\"ग्राम\": \"text\", \"उपकेन्द्र\": \"text\", \"आयु\": \"text\", \"लिंग\": \"text\", ...}"
|
| 238 |
+
)
|
| 239 |
+
#prompt = "<image_soft_token> Extract all the form field labels from this image. Return ONLY a JSON object where keys are field names and values are 'text'. For Hindi/Devanagari forms, preserve the original script. Do NOT include any introductory or concluding text."
|
| 240 |
+
print(f"[Prompt] Using engineered prompt: {prompt[:80]}...")
|
| 241 |
|
| 242 |
|
| 243 |
if progress:
|