Spaces:
Running
Running
| import argparse | |
| import os | |
| import torch | |
| from transformers import AutoTokenizer | |
| from transformers import AutoModelForSeq2SeqLM | |
| from peft import PeftModel | |
| from prompting import encode_prompt | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Generate SQL from a question + db_id using the RLHF model.") | |
| parser.add_argument("--question", type=str, required=True) | |
| parser.add_argument("--db_id", type=str, required=True) | |
| parser.add_argument("--model_dir", type=str, default=None, help="Defaults to outputs/rlhf_text2sql") | |
| parser.add_argument("--use_schema", action="store_true", help="Include schema in the prompt (must match training).") | |
| parser.add_argument("--max_schema_chars", type=int, default=1500) | |
| parser.add_argument("--max_new_tokens", type=int, default=80) | |
| args = parser.parse_args() | |
| device = "mps" if torch.backends.mps.is_available() else "cpu" | |
| project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| adapter_dir = args.model_dir or os.path.join(project_root, "outputs", "rlhf_text2sql") | |
| base_model = os.environ.get("BASE_MODEL", "t5-small") | |
| fallback_base_model = os.path.join(project_root, "models", "t5_spider_sft") | |
| if not os.path.isdir(base_model) and os.path.isdir(fallback_base_model): | |
| base_model = fallback_base_model | |
| local_only = not os.path.isdir(base_model) | |
| tokenizer_source = adapter_dir if os.path.isdir(adapter_dir) else base_model | |
| tokenizer = AutoTokenizer.from_pretrained(tokenizer_source, local_files_only=not os.path.isdir(tokenizer_source)) | |
| base = AutoModelForSeq2SeqLM.from_pretrained(base_model, local_files_only=local_only).to(device) | |
| model = PeftModel.from_pretrained(base, adapter_dir).to(device) | |
| # Merge adapters for faster/stabler generation. | |
| model = model.merge_and_unload() | |
| model.config.use_cache = False | |
| if tokenizer.pad_token_id is None and tokenizer.eos_token_id is not None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| input_ids = encode_prompt( | |
| tokenizer, | |
| args.question, | |
| args.db_id, | |
| device=device, | |
| max_input_tokens=512, | |
| ) | |
| gen_kwargs = dict( | |
| max_new_tokens=args.max_new_tokens, | |
| do_sample=False, | |
| num_beams=1, | |
| early_stopping=True, | |
| pad_token_id=tokenizer.pad_token_id, | |
| eos_token_id=tokenizer.eos_token_id, | |
| ) | |
| with torch.no_grad(): | |
| out = model.generate(input_ids=input_ids.unsqueeze(0), **gen_kwargs) | |
| sql = tokenizer.decode(out[0], skip_special_tokens=True).strip() | |
| print(sql) | |
| if __name__ == "__main__": | |
| main() | |