File size: 2,784 Bytes
54972b3
243e741
54972b3
10f4973
05c8259
86d6a34
05c8259
271f6d5
5375282
10f4973
 
 
 
 
 
5375282
 
 
 
 
e2bc1d4
5375282
 
271f6d5
54972b3
10f4973
 
b26b56f
54972b3
5afbfda
54972b3
 
5afbfda
b26b56f
54972b3
0eaad53
92af058
b26b56f
0a506d1
 
54972b3
 
0eaad53
243e741
 
405417e
0a506d1
54972b3
fe34ceb
92af058
1c03cac
 
405417e
92af058
b26b56f
13f35ce
 
54972b3
13f35ce
 
05c8259
 
5afbfda
13f35ce
5afbfda
54972b3
92af058
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
  
model_names = {
    "gpt2-medium":"gpt2-medium",
    "eluether1.3b":"EleutherAI/gpt-neo-1.3B",
}


def generate_texts(pipeline, input_text, **generator_args):
    output_sequences = pipeline(
        input_text, **generator_args
    )
    return output_sequences
    
@st.cache(allow_output_mutation=True)
def load_tokenizer(model_name):
    tokenizer =  AutoTokenizer.from_pretrained(model_name)
    return tokenizer
    
@st.cache(allow_output_mutation=True)
def load_model(model_name,eos_token_id):
    model =  AutoModelForCausalLM.from_pretrained(model_name,pad_token_id=eos_token_id)
    return model

tokenizers = {model_name:load_tokenizer(model_name) for model_name in model_names.values()}
pipelines = [pipeline('text-generation', model=load_model(model_name,tokenizers[model_name].eos_token_id), tokenizer=tokenizers[model_name]) for model_name in model_names.values()]
print("loaded the pipelines")

default_value = "But not just any roof cleaning will do."

#prompts
st.title("Text Extension or Generation")
st.write("Command + Enter for generation...")

sent = st.text_area("Text", default_value, height = 150)
#generate_button = st.button("Generate")

model_index = st.sidebar.selectbox("Select Model", range(len(model_names)), format_func=lambda x: list(model_names.keys())[x])
max_length = st.sidebar.slider("Max Length", value = 100, min_value = 30, max_value=256)
temperature = st.sidebar.slider("Temperature", value = 1.0, min_value = 0.0, max_value=1.0, step=0.05)
num_return_sequences = st.sidebar.slider("Num Return Sequences", min_value = 1, max_value=4, value = 1)
num_beams = st.sidebar.slider("Num Beams", min_value = 2, max_value=6, value = 4)
top_k = st.sidebar.slider("Top-k", min_value = 0, max_value=100, value = 90)
top_p = st.sidebar.slider("Top-p", min_value = 0.4, max_value=1.0, step = 0.05, value = 0.9)
repetition_penalty = st.sidebar.slider("Repetition-Penalty", min_value = 0.45, max_value=2.0, step = 0.1, value = 1.2)


if len(sent)<10:
    st.write("Input prompt is too small to generate")
else:
    print(sent)
st.write(f"Generating for prompt {sent}....")
output_sequences = generate_texts(pipelines[model_index],
                        sent, 
                        max_length=max_length,
                        num_return_sequences=num_return_sequences,
                        num_beams=num_beams,
                        temperature=temperature, 
                        top_k=top_k, 
                        no_repeat_ngram_size=2,
                        repetition_penalty = repetition_penalty,
                        early_stopping=False,
                        top_p=top_p)
                        

st.write(output_sequences)