Spaces:
Sleeping
Sleeping
Patryk Studzinski commited on
Commit ·
068583f
1
Parent(s): c50ae32
refine GBNF grammar for car advertisement; ensure compact JSON output and improve gap-item structure
Browse filesenhance infill utilities to repair truncated JSON responses; extract individual gap items from incomplete data
- app/logic/answers.gbnf +7 -8
- app/logic/infill_utils.py +20 -1
app/logic/answers.gbnf
CHANGED
|
@@ -1,16 +1,15 @@
|
|
| 1 |
# GBNF Grammar for Car Advertisement Gap Filling
|
| 2 |
-
# Forces model to output valid JSON with gap fills
|
| 3 |
-
#
|
| 4 |
|
| 5 |
-
root ::= "{
|
| 6 |
|
| 7 |
-
gap-list ::= gap-item (
|
| 8 |
|
| 9 |
-
gap-item ::= "{
|
| 10 |
|
| 11 |
-
# Allow words with Polish characters, numbers, spaces
|
| 12 |
phrase ::= word (space word){0,4}
|
| 13 |
word ::= [a-zA-ZżźćńółęąśŻŹĆŃÓŁĘĄŚ0-9.,%-]+
|
| 14 |
space ::= " "
|
| 15 |
-
number ::= [0-9]
|
| 16 |
-
ws ::= [ \t\n]*
|
|
|
|
| 1 |
# GBNF Grammar for Car Advertisement Gap Filling
|
| 2 |
+
# Forces model to output COMPACT valid JSON with gap fills
|
| 3 |
+
# No whitespace/newlines to minimize token count
|
| 4 |
|
| 5 |
+
root ::= "{\"gaps\":[" gap-list "]}"
|
| 6 |
|
| 7 |
+
gap-list ::= gap-item ("," gap-item)*
|
| 8 |
|
| 9 |
+
gap-item ::= "{\"index\":" number ",\"choice\":\"" phrase "\"}"
|
| 10 |
|
| 11 |
+
# Allow words with Polish characters, numbers, spaces (max 5 words)
|
| 12 |
phrase ::= word (space word){0,4}
|
| 13 |
word ::= [a-zA-ZżźćńółęąśŻŹĆŃÓŁĘĄŚ0-9.,%-]+
|
| 14 |
space ::= " "
|
| 15 |
+
number ::= [1-9][0-9]*
|
|
|
app/logic/infill_utils.py
CHANGED
|
@@ -159,7 +159,26 @@ def parse_infill_response(raw_output: str) -> Optional[dict]:
|
|
| 159 |
|
| 160 |
return parsed
|
| 161 |
except json.JSONDecodeError:
|
| 162 |
-
pass # Fall through to
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
return None
|
| 165 |
|
|
|
|
| 159 |
|
| 160 |
return parsed
|
| 161 |
except json.JSONDecodeError:
|
| 162 |
+
pass # Fall through to try repair
|
| 163 |
+
|
| 164 |
+
# Attempt 3: Repair truncated JSON (grammar output cut off by max_tokens)
|
| 165 |
+
# Extract individual gap items even if JSON is incomplete
|
| 166 |
+
gap_pattern = r'\{\s*"index"\s*:\s*(\d+)\s*,\s*"choice"\s*:\s*"([^"]+)"'
|
| 167 |
+
gap_matches = list(re.finditer(gap_pattern, raw_output))
|
| 168 |
+
|
| 169 |
+
if gap_matches:
|
| 170 |
+
for match in gap_matches:
|
| 171 |
+
index = int(match.group(1))
|
| 172 |
+
choice = match.group(2).strip()
|
| 173 |
+
gaps_list.append({
|
| 174 |
+
"index": index,
|
| 175 |
+
"choice": choice
|
| 176 |
+
})
|
| 177 |
+
|
| 178 |
+
return {
|
| 179 |
+
"filled_text": None,
|
| 180 |
+
"gaps": gaps_list
|
| 181 |
+
}
|
| 182 |
|
| 183 |
return None
|
| 184 |
|