| from datasets import load_dataset |
| from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer |
| import torch |
| import time |
| import evaluate |
| import pandas as pd |
| import numpy as np |
|
|
| import streamlit as st |
| st.title('Code Generation') |
| huggingface_dataset_name = "red1xe/code_instructions" |
| dataset = load_dataset(huggingface_dataset_name) |
| model_name='google/flan-t5-base' |
| original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16) |
| tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
| x = st.slider('Select a sample', 0, 1000, 200) |
| if st.button("Show Sample"): |
| index = x |
|
|
| input = dataset['test'][index]['input'] |
| instruction = dataset['test'][index]['instruction'] |
| output = dataset['test'][index]['output'] |
|
|
| prompt = f""" |
| Answer the following question. |
| |
| {input} {instruction} |
| |
| Answer: |
| """ |
|
|
| inputs = tokenizer(prompt, return_tensors='pt') |
| outputs = tokenizer.decode( |
| original_model.generate( |
| inputs["input_ids"], |
| max_new_tokens=200, |
| )[0], |
| skip_special_tokens=True |
| ) |
|
|
| dash_line = '-'.join('' for x in range(100)) |
| st.write(dash_line) |
| st.write(f'INPUT PROMPT:\n{prompt}') |
| st.write(dash_line) |
| st.write(f'BASELINE HUMAN SUMMARY:\n{output}\n') |
| st.write(dash_line) |
| st.write(f'MODEL GENERATION - ZERO SHOT:\n{outputs}') |