Mirrowel commited on
Commit
5a03c26
ยท
1 Parent(s): f50cbff

fix(provider): ๐Ÿ› expand JSON schema validation keyword filtering and improve Gemini 3 tool call reliability

Browse files

This commit addresses issues with schema compatibility and tool call hallucination across providers:

- **Antigravity Provider**: Expands the list of incompatible JSON Schema keywords that must be filtered out for Claude via Antigravity, including validation constraints (minLength, maxLength, minimum, maximum), metadata fields (title, examples, deprecated), and JSON Schema draft 2020-12 specific keywords that cause API rejections.

- **Gemini CLI Provider**: Significantly enhances the Gemini 3 tool calling system to prevent parameter hallucination:
- Rewrites system instruction with more explicit warnings about custom tool schemas differing from training data
- Adds common failure pattern examples to help the model avoid typical mistakes
- Implements strict schema enforcement via `additionalProperties: false` to prevent invalid parameter injection
- Improves parameter signature hints in tool descriptions with recursive type formatting, enum/const support, and nested object display
- Adds new environment variable `GEMINI_CLI_GEMINI3_STRICT_SCHEMA` to control strict schema enforcement
- Enhances type hint formatting to show array-of-objects structures more clearly

These changes work together to reduce tool call errors by making schema constraints more explicit to both the Antigravity API and the Gemini 3 model.

src/rotator_library/providers/antigravity_provider.py CHANGED
@@ -317,8 +317,14 @@ def _clean_claude_schema(schema: Any) -> Any:
317
  return schema
318
 
319
  # Fields not supported by Antigravity/Google's Proto-based API
 
320
  incompatible = {
321
  '$schema', 'additionalProperties', 'minItems', 'maxItems', 'pattern',
 
 
 
 
 
322
  }
323
 
324
  # Handle 'anyOf' by taking the first option (Claude doesn't support anyOf)
 
317
  return schema
318
 
319
  # Fields not supported by Antigravity/Google's Proto-based API
320
+ # Note: Claude via Antigravity rejects JSON Schema draft 2020-12 validation keywords
321
  incompatible = {
322
  '$schema', 'additionalProperties', 'minItems', 'maxItems', 'pattern',
323
+ 'minLength', 'maxLength', 'minimum', 'maximum', 'default',
324
+ 'exclusiveMinimum', 'exclusiveMaximum', 'multipleOf', 'format',
325
+ 'minProperties', 'maxProperties', 'uniqueItems', 'contentEncoding',
326
+ 'contentMediaType', 'contentSchema', 'deprecated', 'readOnly', 'writeOnly',
327
+ 'examples', '$id', '$ref', '$defs', 'definitions', 'title',
328
  }
329
 
330
  # Handle 'anyOf' by taking the first option (Claude doesn't support anyOf)
src/rotator_library/providers/gemini_cli_provider.py CHANGED
@@ -92,19 +92,58 @@ CACHE_DIR = Path(__file__).resolve().parent.parent.parent.parent / "cache" / "ge
92
  GEMINI3_SIGNATURE_CACHE_FILE = CACHE_DIR / "gemini3_signatures.json"
93
 
94
  # Gemini 3 tool fix system instruction (prevents hallucination)
95
- DEFAULT_GEMINI3_SYSTEM_INSTRUCTION = """CRITICAL TOOL USAGE INSTRUCTIONS:
96
- You are operating in a custom environment where tool definitions differ from your training data.
97
- You MUST follow these rules strictly:
98
-
99
- 1. DO NOT use your internal training data to guess tool parameters
100
- 2. ONLY use the exact parameter structure defined in the tool schema
101
- 3. If a tool takes a 'files' parameter, it is ALWAYS an array of objects with specific properties, NEVER a simple array of strings
102
- 4. If a tool edits code, it takes structured JSON objects with specific fields, NEVER raw diff strings or plain text
103
- 5. Parameter names in schemas are EXACT - do not substitute with similar names from your training (e.g., use 'follow_up' not 'suggested_answers')
104
- 6. Array parameters have specific item types - check the schema's 'items' field for the exact structure
105
- 7. When you see "STRICT PARAMETERS" in a tool description, those type definitions override any assumptions
106
-
107
- If you are unsure about a tool's parameters, YOU MUST read the schema definition carefully. Your training data about common tool names like 'read_file' or 'apply_diff' does NOT apply here.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  """
109
 
110
  # Gemini finish reason mapping
@@ -150,12 +189,13 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
150
  self._preserve_signatures_in_client = _env_bool("GEMINI_CLI_PRESERVE_THOUGHT_SIGNATURES", True)
151
  self._enable_signature_cache = _env_bool("GEMINI_CLI_ENABLE_SIGNATURE_CACHE", True)
152
  self._enable_gemini3_tool_fix = _env_bool("GEMINI_CLI_GEMINI3_TOOL_FIX", True)
 
153
 
154
  # Gemini 3 tool fix configuration
155
  self._gemini3_tool_prefix = os.getenv("GEMINI_CLI_GEMINI3_TOOL_PREFIX", "gemini3_")
156
  self._gemini3_description_prompt = os.getenv(
157
  "GEMINI_CLI_GEMINI3_DESCRIPTION_PROMPT",
158
- "\n\nSTRICT PARAMETERS: {params}."
159
  )
160
  self._gemini3_system_instruction = os.getenv(
161
  "GEMINI_CLI_GEMINI3_SYSTEM_INSTRUCTION",
@@ -164,7 +204,8 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
164
 
165
  lib_logger.debug(
166
  f"GeminiCli config: signatures_in_client={self._preserve_signatures_in_client}, "
167
- f"cache={self._enable_signature_cache}, gemini3_fix={self._enable_gemini3_tool_fix}"
 
168
  )
169
 
170
  # =========================================================================
@@ -1145,6 +1186,31 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
1145
 
1146
  return schema
1147
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1148
  def _transform_tool_schemas(self, tools: List[Dict[str, Any]], model: str = "") -> List[Dict[str, Any]]:
1149
  """
1150
  Transforms a list of OpenAI-style tool schemas into the format required by the Gemini CLI API.
@@ -1153,6 +1219,7 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
1153
  For Gemini 3 models, also applies:
1154
  - Namespace prefix to tool names
1155
  - Parameter signature injection into descriptions
 
1156
  """
1157
  transformed_declarations = []
1158
  is_gemini_3 = self._is_gemini_3(model)
@@ -1180,6 +1247,10 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
1180
  if name:
1181
  new_function["name"] = f"{self._gemini3_tool_prefix}{name}"
1182
 
 
 
 
 
1183
  # Inject parameter signature into description
1184
  new_function = self._inject_signature_into_description(new_function)
1185
 
@@ -1218,10 +1289,21 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
1218
 
1219
  return func_decl
1220
 
1221
- def _format_type_hint(self, prop_data: Dict[str, Any]) -> str:
1222
- """Format a type hint for a property schema."""
1223
  type_hint = prop_data.get("type", "unknown")
1224
 
 
 
 
 
 
 
 
 
 
 
 
1225
  if type_hint == "array":
1226
  items = prop_data.get("items", {})
1227
  if isinstance(items, dict):
@@ -1233,7 +1315,11 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
1233
  nested_list = []
1234
  for n, d in nested_props.items():
1235
  if isinstance(d, dict):
1236
- t = d.get("type", "unknown")
 
 
 
 
1237
  req = " REQUIRED" if n in nested_req else ""
1238
  nested_list.append(f"{n}: {t}{req}")
1239
  return f"ARRAY_OF_OBJECTS[{', '.join(nested_list)}]"
@@ -1241,6 +1327,18 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
1241
  return f"ARRAY_OF_{item_type.upper()}"
1242
  return "ARRAY"
1243
 
 
 
 
 
 
 
 
 
 
 
 
 
1244
  return type_hint
1245
 
1246
  def _inject_gemini3_system_instruction(self, request_payload: Dict[str, Any]) -> None:
 
92
  GEMINI3_SIGNATURE_CACHE_FILE = CACHE_DIR / "gemini3_signatures.json"
93
 
94
  # Gemini 3 tool fix system instruction (prevents hallucination)
95
+ DEFAULT_GEMINI3_SYSTEM_INSTRUCTION = """<CRITICAL_TOOL_USAGE_INSTRUCTIONS>
96
+ You are operating in a CUSTOM ENVIRONMENT where tool definitions COMPLETELY DIFFER from your training data.
97
+ VIOLATION OF THESE RULES WILL CAUSE IMMEDIATE SYSTEM FAILURE.
98
+
99
+ ## ABSOLUTE RULES - NO EXCEPTIONS
100
+
101
+ 1. **SCHEMA IS LAW**: The JSON schema in each tool definition is the ONLY source of truth.
102
+ - Your pre-trained knowledge about tools like 'read_file', 'apply_diff', 'write_to_file', 'bash', etc. is INVALID here.
103
+ - Every tool has been REDEFINED with different parameters than what you learned during training.
104
+
105
+ 2. **PARAMETER NAMES ARE EXACT**: Use ONLY the parameter names from the schema.
106
+ - WRONG: 'suggested_answers', 'file_path', 'files_to_read', 'command_to_run'
107
+ - RIGHT: Check the 'properties' field in the schema for the exact names
108
+ - The schema's 'required' array tells you which parameters are mandatory
109
+
110
+ 3. **ARRAY PARAMETERS**: When a parameter has "type": "array", check the 'items' field:
111
+ - If items.type is "object", you MUST provide an array of objects with the EXACT properties listed
112
+ - If items.type is "string", you MUST provide an array of strings
113
+ - NEVER provide a single object when an array is expected
114
+ - NEVER provide an array when a single value is expected
115
+
116
+ 4. **NESTED OBJECTS**: When items.type is "object":
117
+ - Check items.properties for the EXACT field names required
118
+ - Check items.required for which nested fields are mandatory
119
+ - Include ALL required nested fields in EVERY array element
120
+
121
+ 5. **STRICT PARAMETERS HINT**: Tool descriptions contain "STRICT PARAMETERS: ..." which lists:
122
+ - Parameter name, type, and whether REQUIRED
123
+ - For arrays of objects: the nested structure in brackets like [field: type REQUIRED, ...]
124
+ - USE THIS as your quick reference, but the JSON schema is authoritative
125
+
126
+ 6. **BEFORE EVERY TOOL CALL**:
127
+ a. Read the tool's 'parametersJsonSchema' or 'parameters' field completely
128
+ b. Identify ALL required parameters
129
+ c. Verify your parameter names match EXACTLY (case-sensitive)
130
+ d. For arrays, verify you're providing the correct item structure
131
+ e. Do NOT add parameters that don't exist in the schema
132
+
133
+ ## COMMON FAILURE PATTERNS TO AVOID
134
+
135
+ - Using 'path' when schema says 'filePath' (or vice versa)
136
+ - Using 'content' when schema says 'text' (or vice versa)
137
+ - Providing {"file": "..."} when schema wants [{"path": "...", "line_ranges": [...]}]
138
+ - Omitting required nested fields in array items
139
+ - Adding 'additionalProperties' that the schema doesn't define
140
+ - Guessing parameter names from similar tools you know from training
141
+
142
+ ## REMEMBER
143
+ Your training data about function calling is OUTDATED for this environment.
144
+ The tool names may look familiar, but the schemas are DIFFERENT.
145
+ When in doubt, RE-READ THE SCHEMA before making the call.
146
+ </CRITICAL_TOOL_USAGE_INSTRUCTIONS>
147
  """
148
 
149
  # Gemini finish reason mapping
 
189
  self._preserve_signatures_in_client = _env_bool("GEMINI_CLI_PRESERVE_THOUGHT_SIGNATURES", True)
190
  self._enable_signature_cache = _env_bool("GEMINI_CLI_ENABLE_SIGNATURE_CACHE", True)
191
  self._enable_gemini3_tool_fix = _env_bool("GEMINI_CLI_GEMINI3_TOOL_FIX", True)
192
+ self._gemini3_enforce_strict_schema = _env_bool("GEMINI_CLI_GEMINI3_STRICT_SCHEMA", True)
193
 
194
  # Gemini 3 tool fix configuration
195
  self._gemini3_tool_prefix = os.getenv("GEMINI_CLI_GEMINI3_TOOL_PREFIX", "gemini3_")
196
  self._gemini3_description_prompt = os.getenv(
197
  "GEMINI_CLI_GEMINI3_DESCRIPTION_PROMPT",
198
+ "\n\nโš ๏ธ STRICT PARAMETERS (use EXACTLY as shown): {params}. Do NOT use parameters from your training data - use ONLY these parameter names."
199
  )
200
  self._gemini3_system_instruction = os.getenv(
201
  "GEMINI_CLI_GEMINI3_SYSTEM_INSTRUCTION",
 
204
 
205
  lib_logger.debug(
206
  f"GeminiCli config: signatures_in_client={self._preserve_signatures_in_client}, "
207
+ f"cache={self._enable_signature_cache}, gemini3_fix={self._enable_gemini3_tool_fix}, "
208
+ f"gemini3_strict_schema={self._gemini3_enforce_strict_schema}"
209
  )
210
 
211
  # =========================================================================
 
1186
 
1187
  return schema
1188
 
1189
+ def _enforce_strict_schema(self, schema: Any) -> Any:
1190
+ """
1191
+ Enforce strict JSON schema for Gemini 3 to prevent hallucinated parameters.
1192
+
1193
+ Adds 'additionalProperties: false' recursively to all object schemas,
1194
+ which tells the model it CANNOT add properties not in the schema.
1195
+ """
1196
+ if not isinstance(schema, dict):
1197
+ return schema
1198
+
1199
+ result = {}
1200
+ for key, value in schema.items():
1201
+ if isinstance(value, dict):
1202
+ result[key] = self._enforce_strict_schema(value)
1203
+ elif isinstance(value, list):
1204
+ result[key] = [self._enforce_strict_schema(item) if isinstance(item, dict) else item for item in value]
1205
+ else:
1206
+ result[key] = value
1207
+
1208
+ # Add additionalProperties: false to object schemas
1209
+ if result.get("type") == "object" and "properties" in result:
1210
+ result["additionalProperties"] = False
1211
+
1212
+ return result
1213
+
1214
  def _transform_tool_schemas(self, tools: List[Dict[str, Any]], model: str = "") -> List[Dict[str, Any]]:
1215
  """
1216
  Transforms a list of OpenAI-style tool schemas into the format required by the Gemini CLI API.
 
1219
  For Gemini 3 models, also applies:
1220
  - Namespace prefix to tool names
1221
  - Parameter signature injection into descriptions
1222
+ - Strict schema enforcement (additionalProperties: false)
1223
  """
1224
  transformed_declarations = []
1225
  is_gemini_3 = self._is_gemini_3(model)
 
1247
  if name:
1248
  new_function["name"] = f"{self._gemini3_tool_prefix}{name}"
1249
 
1250
+ # Enforce strict schema (additionalProperties: false)
1251
+ if self._gemini3_enforce_strict_schema and "parametersJsonSchema" in new_function:
1252
+ new_function["parametersJsonSchema"] = self._enforce_strict_schema(new_function["parametersJsonSchema"])
1253
+
1254
  # Inject parameter signature into description
1255
  new_function = self._inject_signature_into_description(new_function)
1256
 
 
1289
 
1290
  return func_decl
1291
 
1292
+ def _format_type_hint(self, prop_data: Dict[str, Any], depth: int = 0) -> str:
1293
+ """Format a detailed type hint for a property schema."""
1294
  type_hint = prop_data.get("type", "unknown")
1295
 
1296
+ # Handle enum values - show allowed options
1297
+ if "enum" in prop_data:
1298
+ enum_vals = prop_data["enum"]
1299
+ if len(enum_vals) <= 5:
1300
+ return f"string ENUM[{', '.join(repr(v) for v in enum_vals)}]"
1301
+ return f"string ENUM[{len(enum_vals)} options]"
1302
+
1303
+ # Handle const values
1304
+ if "const" in prop_data:
1305
+ return f"string CONST={repr(prop_data['const'])}"
1306
+
1307
  if type_hint == "array":
1308
  items = prop_data.get("items", {})
1309
  if isinstance(items, dict):
 
1315
  nested_list = []
1316
  for n, d in nested_props.items():
1317
  if isinstance(d, dict):
1318
+ # Recursively format nested types (limit depth)
1319
+ if depth < 1:
1320
+ t = self._format_type_hint(d, depth + 1)
1321
+ else:
1322
+ t = d.get("type", "unknown")
1323
  req = " REQUIRED" if n in nested_req else ""
1324
  nested_list.append(f"{n}: {t}{req}")
1325
  return f"ARRAY_OF_OBJECTS[{', '.join(nested_list)}]"
 
1327
  return f"ARRAY_OF_{item_type.upper()}"
1328
  return "ARRAY"
1329
 
1330
+ if type_hint == "object":
1331
+ nested_props = prop_data.get("properties", {})
1332
+ nested_req = prop_data.get("required", [])
1333
+ if nested_props and depth < 1:
1334
+ nested_list = []
1335
+ for n, d in nested_props.items():
1336
+ if isinstance(d, dict):
1337
+ t = d.get("type", "unknown")
1338
+ req = " REQUIRED" if n in nested_req else ""
1339
+ nested_list.append(f"{n}: {t}{req}")
1340
+ return f"object{{{', '.join(nested_list)}}}"
1341
+
1342
  return type_hint
1343
 
1344
  def _inject_gemini3_system_instruction(self, request_payload: Dict[str, Any]) -> None: