Vigen1 commited on
Commit
f85d0fa
·
verified ·
1 Parent(s): 14eefac

Upload test_t5.py

Browse files
Files changed (1) hide show
  1. test_t5.py +50 -0
test_t5.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.model_selection import train_test_split
2
+ from datasets import Dataset, DatasetDict, load_dataset, interleave_datasets, load_from_disk
3
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
4
+ import torch
5
+ import time
6
+ import evaluate
7
+ import pandas as pd
8
+ import numpy as np
9
+ model_name = 't5-small'
10
+
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
12
+
13
+ original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
14
+ original_model = original_model.to('cuda')
15
+
16
+ finetuned_model = AutoModelForSeq2SeqLM.from_pretrained("finetuned_model_2_epoch")
17
+ finetuned_model = finetuned_model.to('cuda')
18
+ data = pd.read_csv("text-to-sql_from_spider.csv")
19
+
20
+ question = data["question"][0] #dataset['test'][index]['question']
21
+ context = "CREATE TABLE table_name_11 (date VARCHAR, away_team VARCHAR)" #dataset['test'][index]['schema']
22
+ answer = data["sql"][0] #dataset['test'][index]['sql']
23
+
24
+ prompt = f"""Tables:
25
+ {context}
26
+
27
+ Question:
28
+ {question}
29
+
30
+ Answer:
31
+ """
32
+
33
+ inputs = tokenizer(prompt, return_tensors='pt')
34
+ inputs = inputs.to('cuda')
35
+
36
+ output = tokenizer.decode(
37
+ finetuned_model.generate(
38
+ inputs["input_ids"],
39
+ max_new_tokens=200,
40
+ )[0],
41
+ skip_special_tokens=True
42
+ )
43
+
44
+ dash_line = '-'*100
45
+ print(dash_line)
46
+ print(f'INPUT PROMPT:\n{prompt}')
47
+ print(dash_line)
48
+ print(f'BASELINE HUMAN ANSWER:\n{answer}\n')
49
+ print(dash_line)
50
+ print(f'MODEL GENERATION - ZERO SHOT:\n{output}')