eataly_genius / app.py
clui's picture
baseline app
0aeb96c verified
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_dataset
# Za艂aduj dataset z Hugging Face
dataset = load_dataset("clui/lyricsgenius")
# Wyb贸r modelu do t艂umaczenia z w艂oskiego na angielski
model_name_it_en = "Helsinki-NLP/opus-mt-it-en"
tokenizer_it_en = AutoTokenizer.from_pretrained(model_name_it_en)
model_it_en = AutoModelForSeq2SeqLM.from_pretrained(model_name_it_en)
# Wyb贸r modelu do t艂umaczenia z angielskiego na polski
model_name_en_pl = "sdadas/mt5-base-translator-en-pl"
tokenizer_en_pl = AutoTokenizer.from_pretrained(model_name_en_pl)
model_en_pl = AutoModelForSeq2SeqLM.from_pretrained(model_name_en_pl)
# Pole do wprowadzania nazwy artysty
st.title("Neural Notes")
artist_name = st.text_input("Wpisz nazw臋 artysty")
if artist_name:
# Filtruj dataset po nazwie artysty
filtered_dataset = dataset.filter(lambda example: artist_name.lower() in example["artist"].lower())
if len(filtered_dataset["train"]) == 0:
st.error("Nie znaleziono piosenek dla tego artysty w datasetcie.")
else:
# Wy艣wietl list臋 piosenek
song_titles = [song["title"] for song in filtered_dataset["train"]]
selected_song = st.selectbox("Wybierz piosenk臋", ["-- Wybierz --"] + song_titles)
if selected_song != "-- Wybierz --":
# Pobierz tekst piosenki
selected_song_data = next((song for song in filtered_dataset["train"] if song["title"] == selected_song), None)
if selected_song_data:
lyrics = selected_song_data["lyrics"]
# Podzia艂 tekstu na linie
lines = lyrics.split('\n')
# Wy艣wietlanie oryginalnego tekstu w trzech kolumnach
col1, col2, col3 = st.columns([3, 3, 3])
with col1:
st.subheader("Oryginalne linie (w艂oski):")
for line in lines:
st.write(line)
with col2:
st.subheader("Przet艂umaczone linie (angielski):")
# T艂umaczenie z w艂oskiego na angielski
translated_lines_en = []
for line in lines:
if line.strip():
inputs = tokenizer_it_en(line, return_tensors="pt", padding=True)
translated_outputs = model_it_en.generate(**inputs)
translated_text = tokenizer_it_en.decode(translated_outputs[0], skip_special_tokens=True)
translated_lines_en.append(translated_text)
else:
translated_lines_en.append("")
for line in translated_lines_en:
st.write(line)
with col3:
st.subheader("Przet艂umaczone linie (polski):")
# T艂umaczenie z angielskiego na polski
translated_lines_pl = []
for line in translated_lines_en:
if line.strip():
inputs = tokenizer_en_pl(line, return_tensors="pt", padding=True)
translated_outputs = model_en_pl.generate(**inputs)
translated_text = tokenizer_en_pl.decode(translated_outputs[0], skip_special_tokens=True)
translated_lines_pl.append(translated_text)
else:
translated_lines_pl.append("")
for line in translated_lines_pl:
st.write(line)
else:
st.error("Nie znaleziono tekstu piosenki.")
else:
st.info("Prosz臋 wprowad藕 nazw臋 artysty.")