Spaces:
Runtime error
Runtime error
changing some styles
Browse files
app.css
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
:root {
|
| 2 |
+
--color-primary: #65db9d;
|
| 3 |
+
--color-secondary: #023b1d;
|
| 4 |
+
|
| 5 |
+
--class_1-maxWidth: 1024px;
|
| 6 |
+
}
|
| 7 |
+
|
| 8 |
+
/* GLOBAL STYLING */
|
| 9 |
+
.class_1 {
|
| 10 |
+
background-color: var(--color-secondary);
|
| 11 |
+
display: grid;
|
| 12 |
+
grid-gap: 1rem;
|
| 13 |
+
height: auto;
|
| 14 |
+
width: 100%;
|
| 15 |
+
max-width: var(--class_1-maxWidth);
|
| 16 |
+
margin: 0 auto;
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
@media (min-width: 768px) {
|
| 20 |
+
.class_1 {
|
| 21 |
+
grid-template-columns: 1fr 1fr;
|
| 22 |
+
}
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
.bg-primary {
|
| 26 |
+
background-color: var(--color-primary);
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
.text-primary {
|
| 30 |
+
color: var(--color-primary);
|
| 31 |
+
}
|
app.py
CHANGED
|
@@ -1,20 +1,13 @@
|
|
| 1 |
import streamlit as st
|
|
|
|
| 2 |
|
| 3 |
from googletrans import Translator
|
| 4 |
-
from
|
| 5 |
# from huggingface_hub import snapshot_download
|
| 6 |
|
| 7 |
page = st.sidebar.selectbox("Model ", ["Finetuned on News data", "Pretrained GPT2"])
|
| 8 |
translator = Translator()
|
| 9 |
|
| 10 |
-
def load_model(model_name):
|
| 11 |
-
with st.spinner('Waiting for the model to load.....'):
|
| 12 |
-
# snapshot_download('flax-community/Sinhala-gpt2')
|
| 13 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 14 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id)
|
| 15 |
-
st.success('Model loaded!!')
|
| 16 |
-
return model, tokenizer
|
| 17 |
-
|
| 18 |
seed = st.sidebar.text_input('Starting text', 'ආයුබෝවන්')
|
| 19 |
seq_num = st.sidebar.number_input('Number of sequences to generate ', 1, 20, 5)
|
| 20 |
max_len = st.sidebar.number_input('Length of a sequence ', 5, 300, 100)
|
|
@@ -37,6 +30,53 @@ def generate(model, tokenizer, seed, seq_num, max_len):
|
|
| 37 |
for beam_out in beam_outputs:
|
| 38 |
sentences.append(tokenizer.decode(beam_out, skip_special_tokens=True))
|
| 39 |
return sentences
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
if page == 'Pretrained GPT2':
|
| 42 |
st.title('Sinhala Text generation with GPT2')
|
|
@@ -51,11 +91,13 @@ if page == 'Pretrained GPT2':
|
|
| 51 |
# generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
|
| 52 |
# seqs = generator(seed, max_length=max_len, num_return_sequences=seq_num)
|
| 53 |
seqs = generate(model, tokenizer, seed, seq_num, max_len)
|
|
|
|
| 54 |
for i, seq in enumerate(seqs):
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
st.
|
|
|
|
| 59 |
except Exception as e:
|
| 60 |
st.exception(f'Exception: {e}')
|
| 61 |
else:
|
|
@@ -72,11 +114,17 @@ else:
|
|
| 72 |
# generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
|
| 73 |
# seqs = generator(seed, max_length=max_len, num_return_sequences=seq_num)
|
| 74 |
seqs = generate(model, tokenizer, seed, seq_num, max_len)
|
|
|
|
| 75 |
for i, seq in enumerate(seqs):
|
| 76 |
-
st.info(f'Generated sequence {i+1}:')
|
| 77 |
-
st.write(seq)
|
| 78 |
-
st.info(f'English translation (by Google Translation):')
|
| 79 |
-
st.write(translator.translate(seq, src='si', dest='en').text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
except Exception as e:
|
| 81 |
st.exception(f'Exception: {e}')
|
| 82 |
st.markdown('____________')
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
import streamlit.components.v1 as component
|
| 3 |
|
| 4 |
from googletrans import Translator
|
| 5 |
+
from model import load_model
|
| 6 |
# from huggingface_hub import snapshot_download
|
| 7 |
|
| 8 |
page = st.sidebar.selectbox("Model ", ["Finetuned on News data", "Pretrained GPT2"])
|
| 9 |
translator = Translator()
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
seed = st.sidebar.text_input('Starting text', 'ආයුබෝවන්')
|
| 12 |
seq_num = st.sidebar.number_input('Number of sequences to generate ', 1, 20, 5)
|
| 13 |
max_len = st.sidebar.number_input('Length of a sequence ', 5, 300, 100)
|
|
|
|
| 30 |
for beam_out in beam_outputs:
|
| 31 |
sentences.append(tokenizer.decode(beam_out, skip_special_tokens=True))
|
| 32 |
return sentences
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def html(body):
|
| 36 |
+
st.markdown(body, unsafe_allow_html=True)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def card_begin_str(Sinhala_sentence):
|
| 40 |
+
return (
|
| 41 |
+
"<style>div.card{background-color:#023b1d;border-radius: 5px;box-shadow: 0 4px 8px 0 rgba(0,0,0,0.2);transition: 0.3s;} small{ margin: 5px;}</style>"
|
| 42 |
+
'<div class="card">'
|
| 43 |
+
'<div class="container">'
|
| 44 |
+
f"<small>{Sinhala_sentence}</small>"
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def card_end_str():
|
| 49 |
+
return "</div></div>"
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def card(sinhala_sentence, english_sentence):
|
| 53 |
+
lines = [card_begin_str(sinhala_sentence), f"<p>{english_sentence}</p>", card_end_str()]
|
| 54 |
+
html("".join(lines))
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def br(n):
|
| 58 |
+
html(n * "<br>")
|
| 59 |
+
|
| 60 |
+
def card_html(sinhala_sentence, english_sentence):
|
| 61 |
+
with open('./app.css') as f:
|
| 62 |
+
css_file = f.read()
|
| 63 |
+
return component.html(
|
| 64 |
+
f"""
|
| 65 |
+
<style>{css_file}</style>
|
| 66 |
+
<article class="class_1 bg-white rounded-lg p-4 relative">
|
| 67 |
+
<p class="font-bold items-center text-sm text-primary relative mb-1">{sinhala_sentence}</p>
|
| 68 |
+
|
| 69 |
+
<div class="flex items-center text-white-400 mb-4">
|
| 70 |
+
<i class="fab fa-google mx-2"></i>
|
| 71 |
+
<small class="text-white-400">English Translations are by Google Translate</small>
|
| 72 |
+
</div>
|
| 73 |
+
|
| 74 |
+
<p class="not-italic items-center text-sm text-primary relative mb-4">
|
| 75 |
+
{english_sentence}
|
| 76 |
+
</p>
|
| 77 |
+
</article>
|
| 78 |
+
"""
|
| 79 |
+
)
|
| 80 |
|
| 81 |
if page == 'Pretrained GPT2':
|
| 82 |
st.title('Sinhala Text generation with GPT2')
|
|
|
|
| 91 |
# generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
|
| 92 |
# seqs = generator(seed, max_length=max_len, num_return_sequences=seq_num)
|
| 93 |
seqs = generate(model, tokenizer, seed, seq_num, max_len)
|
| 94 |
+
st.warning("English sentences were translated by Google Translate.")
|
| 95 |
for i, seq in enumerate(seqs):
|
| 96 |
+
english_sentence = translator.translate(seq, src='si', dest='en').text
|
| 97 |
+
# card(seq, english_sentence)
|
| 98 |
+
html(card_begin_str(seq))
|
| 99 |
+
st.info(english_sentence)
|
| 100 |
+
html(card_end_str())
|
| 101 |
except Exception as e:
|
| 102 |
st.exception(f'Exception: {e}')
|
| 103 |
else:
|
|
|
|
| 114 |
# generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
|
| 115 |
# seqs = generator(seed, max_length=max_len, num_return_sequences=seq_num)
|
| 116 |
seqs = generate(model, tokenizer, seed, seq_num, max_len)
|
| 117 |
+
st.warning("English sentences were translated by Google Translate.")
|
| 118 |
for i, seq in enumerate(seqs):
|
| 119 |
+
# st.info(f'Generated sequence {i+1}:')
|
| 120 |
+
# st.write(seq)
|
| 121 |
+
# st.info(f'English translation (by Google Translation):')
|
| 122 |
+
# st.write(translator.translate(seq, src='si', dest='en').text)
|
| 123 |
+
english_sentence = translator.translate(seq, src='si', dest='en').text
|
| 124 |
+
# card(seq, english_sentence)
|
| 125 |
+
html(card_begin_str(seq))
|
| 126 |
+
st.info(english_sentence)
|
| 127 |
+
html(card_end_str())
|
| 128 |
except Exception as e:
|
| 129 |
st.exception(f'Exception: {e}')
|
| 130 |
st.markdown('____________')
|
model.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
|
| 3 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 4 |
+
|
| 5 |
+
def load_model(model_name):
|
| 6 |
+
with st.spinner('Waiting for the model to load.....'):
|
| 7 |
+
# snapshot_download('flax-community/Sinhala-gpt2')
|
| 8 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 9 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id)
|
| 10 |
+
st.success('Model loaded!!')
|
| 11 |
+
return model, tokenizer
|