Spaces:
No application file
No application file
| import streamlit as st | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| import re | |
| # ---------------------------- | |
| # Загрузка модели | |
| # ---------------------------- | |
| model_name = "Waris01/google-t5-finetuning-text-summarization" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| # ---------------------------- | |
| # Очистка текста | |
| # ---------------------------- | |
| def clean_text(text): | |
| text = re.sub(r'\s+', ' ', text) | |
| text = re.sub(r'\[[0-9]+\]', '', text) | |
| text = re.sub(r'http\S+', '', text) | |
| return text.strip() | |
| # ---------------------------- | |
| # Генерация суммаризации | |
| # ---------------------------- | |
| def summarize(text): | |
| cleaned = clean_text(text) | |
| inputs = tokenizer("summarize: " + cleaned, return_tensors="pt", truncation=True, max_length=512) | |
| summary_ids = model.generate(inputs["input_ids"], max_length=150, num_beams=2, early_stopping=True) | |
| return tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
| # ---------------------------- | |
| # Интерфейс Streamlit | |
| # ---------------------------- | |
| st.title("🧬 Scientific Article Summarizer") | |
| st.write("Вставьте текст статьи и получите краткую аннотацию.") | |
| input_text = st.text_area("Введите текст статьи:", height=250) | |
| if st.button("Суммаризировать"): | |
| if len(input_text.strip()) == 0: | |
| st.error("Введите текст!") | |
| else: | |
| with st.spinner("Генерация суммаризации..."): | |
| summary = summarize(input_text) | |
| st.subheader("📘 Краткое содержание:") | |
| st.write(summary) | |