Spaces:
Running on Zero
Running on Zero
| """Optional small-model assist: propose extra characterization inputs. | |
| This never weakens the "capture-then-assert" guarantee in generator.py -- any | |
| input tuple suggested here is executed by `generator.capture` exactly like the | |
| deterministic inputs, so the recorded expectation always comes from running the | |
| real code, never from the model. A model that proposes a redundant or useless | |
| input just yields a redundant (still-correct) test; it can never make a test | |
| green by assertion alone. | |
| """ | |
| from __future__ import annotations | |
| import ast | |
| import re | |
| from analyzer import FunctionInfo | |
| try: | |
| import spaces | |
| except ImportError: # local/dev environments without the `spaces` package | |
| class _SpacesShim: | |
| def GPU(*args, **kwargs): | |
| if args and callable(args[0]): | |
| return args[0] | |
| def decorator(fn): | |
| return fn | |
| return decorator | |
| spaces = _SpacesShim() | |
| MODEL_ID = "Qwen/Qwen2.5-Coder-0.5B-Instruct" | |
| MAX_NEW_TOKENS = 96 | |
| _model = None | |
| _tokenizer = None | |
| def _load_model(): | |
| global _model, _tokenizer | |
| if _model is None: | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| _tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) | |
| _model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16) | |
| return _model, _tokenizer | |
| def suggest_extra_inputs(fn: FunctionInfo, existing: list[tuple], max_new: int = 2) -> list[tuple]: | |
| """Ask a small coder model for extra argument tuples to try for `fn`. | |
| Returns [] on any failure (no GPU, model unavailable, bad output, ...) so the | |
| deterministic pipeline never depends on this succeeding. | |
| """ | |
| try: | |
| import torch | |
| model, tokenizer = _load_model() | |
| model.to("cuda" if torch.cuda.is_available() else "cpu") | |
| except Exception: | |
| return [] | |
| prompt = _build_prompt(fn, existing, max_new) | |
| try: | |
| messages = [{"role": "user", "content": prompt}] | |
| input_ids = tokenizer.apply_chat_template( | |
| messages, add_generation_prompt=True, return_tensors="pt" | |
| ).to(model.device) | |
| output_ids = model.generate( | |
| input_ids, | |
| max_new_tokens=MAX_NEW_TOKENS, | |
| do_sample=False, | |
| pad_token_id=tokenizer.eos_token_id, | |
| ) | |
| text = tokenizer.decode(output_ids[0, input_ids.shape[1] :], skip_special_tokens=True) | |
| except Exception: | |
| return [] | |
| return _parse_tuples(text, arity=len(fn.parameters), limit=max_new) | |
| def _build_prompt(fn: FunctionInfo, existing: list[tuple], max_new: int) -> str: | |
| signature = ", ".join(fn.parameters) or "(no arguments)" | |
| existing_repr = ", ".join(repr(case) for case in existing[:4]) | |
| return ( | |
| f"Function under test: {fn.qualname}({signature})\n" | |
| f"Docstring: {fn.docstring or '(none)'}\n" | |
| f"Argument tuples already tried: [{existing_repr}]\n\n" | |
| f"Suggest {max_new} new argument tuples that probe edge cases " | |
| "(empty, zero, negative, boundary, or unusual values) and differ from " | |
| "the ones already tried. Respond with ONLY a Python list of tuples, " | |
| "e.g. [(0, 'x'), (-1, '')]. No explanation, no code block." | |
| ) | |
| def _parse_tuples(text: str, arity: int, limit: int) -> list[tuple]: | |
| """Pull up to `limit` arity-matched argument tuples out of free-form model text.""" | |
| match = re.search(r"\[.*\]", text, re.DOTALL) | |
| if not match: | |
| return [] | |
| try: | |
| parsed = ast.literal_eval(match.group(0)) | |
| except (ValueError, SyntaxError): | |
| return [] | |
| if not isinstance(parsed, list): | |
| return [] | |
| cases: list[tuple] = [] | |
| for item in parsed: | |
| if isinstance(item, tuple) and len(item) == arity: | |
| cases.append(item) | |
| elif arity == 1 and isinstance(item, (int, float, str, bool)): | |
| cases.append((item,)) | |
| if len(cases) >= limit: | |
| break | |
| return cases | |