Patryk Studzinski commited on
Commit
068583f
·
1 Parent(s): c50ae32

refine GBNF grammar for car advertisement; ensure compact JSON output and improve gap-item structure

Browse files

enhance infill utilities to repair truncated JSON responses; extract individual gap items from incomplete data

app/logic/answers.gbnf CHANGED
@@ -1,16 +1,15 @@
1
  # GBNF Grammar for Car Advertisement Gap Filling
2
- # Forces model to output valid JSON with gap fills
3
- # Supports 1-10 gaps with Polish characters
4
 
5
- root ::= "{" ws "\"gaps\":" ws "[" ws gap-list ws "]" ws "}"
6
 
7
- gap-list ::= gap-item (ws "," ws gap-item)*
8
 
9
- gap-item ::= "{" ws "\"index\":" ws number ws "," ws "\"choice\":" ws "\"" phrase "\"" ws "}"
10
 
11
- # Allow words with Polish characters, numbers, spaces
12
  phrase ::= word (space word){0,4}
13
  word ::= [a-zA-ZżźćńółęąśŻŹĆŃÓŁĘĄŚ0-9.,%-]+
14
  space ::= " "
15
- number ::= [0-9]+
16
- ws ::= [ \t\n]*
 
1
  # GBNF Grammar for Car Advertisement Gap Filling
2
+ # Forces model to output COMPACT valid JSON with gap fills
3
+ # No whitespace/newlines to minimize token count
4
 
5
+ root ::= "{\"gaps\":[" gap-list "]}"
6
 
7
+ gap-list ::= gap-item ("," gap-item)*
8
 
9
+ gap-item ::= "{\"index\":" number ",\"choice\":\"" phrase "\"}"
10
 
11
+ # Allow words with Polish characters, numbers, spaces (max 5 words)
12
  phrase ::= word (space word){0,4}
13
  word ::= [a-zA-ZżźćńółęąśŻŹĆŃÓŁĘĄŚ0-9.,%-]+
14
  space ::= " "
15
+ number ::= [1-9][0-9]*
 
app/logic/infill_utils.py CHANGED
@@ -159,7 +159,26 @@ def parse_infill_response(raw_output: str) -> Optional[dict]:
159
 
160
  return parsed
161
  except json.JSONDecodeError:
162
- pass # Fall through to return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
  return None
165
 
 
159
 
160
  return parsed
161
  except json.JSONDecodeError:
162
+ pass # Fall through to try repair
163
+
164
+ # Attempt 3: Repair truncated JSON (grammar output cut off by max_tokens)
165
+ # Extract individual gap items even if JSON is incomplete
166
+ gap_pattern = r'\{\s*"index"\s*:\s*(\d+)\s*,\s*"choice"\s*:\s*"([^"]+)"'
167
+ gap_matches = list(re.finditer(gap_pattern, raw_output))
168
+
169
+ if gap_matches:
170
+ for match in gap_matches:
171
+ index = int(match.group(1))
172
+ choice = match.group(2).strip()
173
+ gaps_list.append({
174
+ "index": index,
175
+ "choice": choice
176
+ })
177
+
178
+ return {
179
+ "filled_text": None,
180
+ "gaps": gaps_list
181
+ }
182
 
183
  return None
184