Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| import re | |
| # ---------------------------- | |
| # Настройки модели | |
| # ---------------------------- | |
| MODEL_NAME = "Waris01/google-t5-finetuning-text-summarization" | |
| def load_model(model_name): | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| return tokenizer, model | |
| tokenizer, model = load_model(MODEL_NAME) | |
| # ---------------------------- | |
| # Функция очистки текста | |
| # ---------------------------- | |
| def clean_text(text): | |
| text = re.sub(r'\s+', ' ', text) | |
| text = re.sub(r'\[[0-9]+\]', '', text) | |
| text = re.sub(r'http\S+', '', text) | |
| return text.strip() | |
| # ---------------------------- | |
| # Функция суммаризации | |
| # ---------------------------- | |
| def summarize(text): | |
| cleaned = clean_text(text) | |
| inputs = tokenizer("summarize: " + cleaned, return_tensors="pt", truncation=True, max_length=512) | |
| summary_ids = model.generate( | |
| inputs["input_ids"], | |
| max_length=150, | |
| min_length=40, | |
| num_beams=2, | |
| early_stopping=True | |
| ) | |
| return tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
| # ---------------------------- | |
| # Интерфейс Streamlit | |
| # ---------------------------- | |
| st.title("🧬 Scientific Article Summarizer") | |
| st.write("Вставьте текст статьи, чтобы получить краткую аннотацию.") | |
| input_text = st.text_area("Введите текст статьи:", height=300) | |
| if st.button("Суммаризировать"): | |
| if not input_text.strip(): | |
| st.error("Введите текст статьи!") | |
| else: | |
| with st.spinner("Генерация суммаризации..."): | |
| summary = summarize(input_text) | |
| st.subheader("📘 Краткое содержание:") | |
| st.write(summary) | |