| from datasets import load_dataset | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer | |
| import torch | |
| import time | |
| import evaluate | |
| import pandas as pd | |
| import numpy as np | |
| import streamlit as st | |
| st.title('Code Generation') | |
| huggingface_dataset_name = "red1xe/code_instructions" | |
| dataset = load_dataset(huggingface_dataset_name) | |
| model_name='gpt2' | |
| original_model = AutoModelForCausalLM.from_pretrained(model_name) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| x = st.slider('Select a sample', 0, 1000, 200) | |
| if st.button("Show Sample"): | |
| index = x | |
| input = dataset['test'][index]['input'] | |
| instruction = dataset['test'][index]['instruction'] | |
| output = dataset['test'][index]['output'] | |
| prompt = f""" | |
| Answer the following question. | |
| {input} {instruction} | |
| Answer: | |
| """ | |
| inputs = tokenizer(prompt, return_tensors='pt') | |
| outputs = tokenizer.decode( | |
| original_model.generate( | |
| inputs["input_ids"], | |
| max_new_tokens=200, | |
| )[0], | |
| skip_special_tokens=True | |
| ) | |
| dash_line = '-'.join('' for x in range(100)) | |
| st.write(dash_line) | |
| st.write(f'INPUT PROMPT:\n{prompt}') | |
| st.write(dash_line) | |
| st.write(f'BASELINE HUMAN SUMMARY:\n{output}\n') | |
| st.write(dash_line) | |
| st.write(f'MODEL GENERATION - ZERO SHOT:\n{outputs}') |