Spaces:
Sleeping
Sleeping
Commit ·
6036a45
1
Parent(s): d007853
3.53
Browse files- app.py +405 -302
- requirements.txt +1 -3
app.py
CHANGED
|
@@ -29,6 +29,40 @@ from transformers import (
|
|
| 29 |
AutoModelForCausalLM # 4 Qwen
|
| 30 |
)
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
class FallbackLLMSystem:
|
| 33 |
def __init__(self):
|
| 34 |
"""Initialize fallback models for event detection and reasoning"""
|
|
@@ -249,98 +283,197 @@ class QwenSystem:
|
|
| 249 |
raise
|
| 250 |
|
| 251 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
class TranslationSystem:
|
| 253 |
-
def __init__(self
|
| 254 |
-
"""
|
| 255 |
-
Initialize translation system using Helsinki NLP model.
|
| 256 |
-
"""
|
| 257 |
try:
|
| 258 |
-
self.translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ru-en")
|
| 259 |
-
|
| 260 |
except Exception as e:
|
| 261 |
-
st.error(f"Error initializing
|
| 262 |
raise
|
| 263 |
|
| 264 |
def translate_text(self, text):
|
| 265 |
-
"""
|
| 266 |
-
Translate single text using Helsinki NLP model with chunking for long texts.
|
| 267 |
-
"""
|
| 268 |
if pd.isna(text) or not isinstance(text, str) or not text.strip():
|
| 269 |
-
return text
|
| 270 |
|
| 271 |
text = str(text).strip()
|
| 272 |
if not text:
|
| 273 |
-
return
|
| 274 |
|
| 275 |
try:
|
| 276 |
-
|
| 277 |
-
max_chunk_size = 512 # Standard transformer length
|
| 278 |
-
|
| 279 |
-
if len(text.split()) <= max_chunk_size:
|
| 280 |
-
# Direct translation for short texts
|
| 281 |
-
result = self.translator(text, max_length=512)
|
| 282 |
-
return result[0]['translation_text']
|
| 283 |
-
|
| 284 |
-
# Split long text into chunks by sentences
|
| 285 |
chunks = self._split_into_chunks(text, max_chunk_size)
|
| 286 |
translated_chunks = []
|
| 287 |
|
| 288 |
for chunk in chunks:
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
|
| 293 |
return ' '.join(translated_chunks)
|
| 294 |
|
| 295 |
except Exception as e:
|
| 296 |
-
st.warning(f"Translation error: {str(e)}
|
| 297 |
return text
|
| 298 |
-
|
| 299 |
-
def _split_into_chunks(self, text, max_length):
|
| 300 |
-
"""
|
| 301 |
-
Split text into chunks by sentences, respecting max length.
|
| 302 |
-
"""
|
| 303 |
-
# Simple sentence splitting by common punctuation
|
| 304 |
-
sentences = [s.strip() for s in text.replace('!', '.').replace('?', '.').split('.') if s.strip()]
|
| 305 |
-
|
| 306 |
-
chunks = []
|
| 307 |
-
current_chunk = []
|
| 308 |
-
current_length = 0
|
| 309 |
-
|
| 310 |
-
for sentence in sentences:
|
| 311 |
-
sentence_length = len(sentence.split())
|
| 312 |
-
|
| 313 |
-
if current_length + sentence_length > max_length:
|
| 314 |
-
if current_chunk:
|
| 315 |
-
chunks.append(' '.join(current_chunk))
|
| 316 |
-
current_chunk = [sentence]
|
| 317 |
-
current_length = sentence_length
|
| 318 |
-
else:
|
| 319 |
-
current_chunk.append(sentence)
|
| 320 |
-
current_length += sentence_length
|
| 321 |
|
| 322 |
-
if current_chunk:
|
| 323 |
-
chunks.append(' '.join(current_chunk))
|
| 324 |
-
|
| 325 |
-
return chunks
|
| 326 |
-
|
| 327 |
|
| 328 |
|
| 329 |
def process_file(uploaded_file, model_choice, translation_method=None):
|
| 330 |
df = None
|
| 331 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
df = pd.read_excel(uploaded_file, sheet_name='Публикации')
|
| 333 |
llm = init_langchain_llm(model_choice)
|
| 334 |
-
# Add fallback initialization here
|
| 335 |
-
fallback_llm = FallbackLLMSystem() if model_choice != "Local-MT5" else llm
|
| 336 |
-
translator = TranslationSystem(batch_size=5)
|
| 337 |
|
| 338 |
-
#
|
| 339 |
groq_llm = ensure_groq_llm()
|
| 340 |
if groq_llm is None:
|
| 341 |
st.warning("Failed to initialize Groq LLM for impact estimation. Using fallback model.")
|
| 342 |
|
| 343 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
df['Translated'] = ''
|
| 345 |
df['Sentiment'] = ''
|
| 346 |
df['Impact'] = ''
|
|
@@ -348,104 +481,104 @@ def process_file(uploaded_file, model_choice, translation_method=None):
|
|
| 348 |
df['Event_Type'] = ''
|
| 349 |
df['Event_Summary'] = ''
|
| 350 |
|
| 351 |
-
# Validate required columns
|
| 352 |
-
required_columns = ['Объект', 'Заголовок', 'Выдержки из текста']
|
| 353 |
-
missing_columns = [col for col in required_columns if col not in df.columns]
|
| 354 |
-
if missing_columns:
|
| 355 |
-
st.error(f"Error: The following required columns are missing: {', '.join(missing_columns)}")
|
| 356 |
-
return None
|
| 357 |
-
|
| 358 |
# Deduplication
|
| 359 |
-
|
| 360 |
df = df.groupby('Объект', group_keys=False).apply(
|
| 361 |
lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65)
|
| 362 |
).reset_index(drop=True)
|
| 363 |
-
|
| 364 |
-
remaining_news_count = len(df)
|
| 365 |
-
duplicates_removed = original_news_count - remaining_news_count
|
| 366 |
-
st.write(f"Из {original_news_count} новостных сообщений удалены {duplicates_removed} дублирующих. Осталось {remaining_news_count}.")
|
| 367 |
-
|
| 368 |
-
# Initialize progress tracking
|
| 369 |
-
progress_bar = st.progress(0)
|
| 370 |
-
status_text = st.empty()
|
| 371 |
|
| 372 |
-
# Process
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 387 |
try:
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
row['Выдержки из текста'],
|
| 392 |
row['Объект']
|
| 393 |
)
|
| 394 |
except Exception as e:
|
|
|
|
|
|
|
| 395 |
if 'rate limit' in str(e).lower():
|
| 396 |
-
st.warning("Rate limit reached. Using fallback
|
| 397 |
-
event_type, event_summary = fallback_llm.detect_events(
|
| 398 |
-
row['Выдержки из текста'],
|
| 399 |
-
row['Объект']
|
| 400 |
-
)
|
| 401 |
-
|
| 402 |
-
df.at[idx, 'Event_Type'] = event_type
|
| 403 |
-
df.at[idx, 'Event_Summary'] = event_summary
|
| 404 |
|
|
|
|
|
|
|
| 405 |
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
if 'rate limit' in str(e).lower():
|
| 418 |
-
st.warning("Groq rate limit reached. Waiting before retry...")
|
| 419 |
-
time.sleep(240) # Wait 4 minutes
|
| 420 |
-
continue
|
| 421 |
-
|
| 422 |
-
df.at[idx, 'Impact'] = impact
|
| 423 |
-
df.at[idx, 'Reasoning'] = reasoning
|
| 424 |
-
|
| 425 |
-
# Update progress
|
| 426 |
-
progress = (idx + 1) / len(df)
|
| 427 |
-
progress_bar.progress(progress)
|
| 428 |
-
status_text.text(f"Проанализировано {idx + 1} из {len(df)} новостей")
|
| 429 |
-
|
| 430 |
-
except Exception as e:
|
| 431 |
-
if 'rate limit' in str(e).lower():
|
| 432 |
-
wait_time = 240 # 4 minutes wait for rate limit
|
| 433 |
-
st.warning(f"Rate limit reached. Waiting {wait_time} seconds...")
|
| 434 |
-
time.sleep(wait_time)
|
| 435 |
-
continue
|
| 436 |
-
st.warning(f"Ошибка при обработке новости {idx + 1}: {str(e)}")
|
| 437 |
-
continue
|
| 438 |
|
| 439 |
-
|
| 440 |
-
|
|
|
|
| 441 |
|
| 442 |
-
|
| 443 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 444 |
|
| 445 |
return df
|
| 446 |
|
| 447 |
except Exception as e:
|
| 448 |
-
st.error(f"
|
| 449 |
return None
|
| 450 |
|
| 451 |
def translate_reasoning_to_russian(llm, text):
|
|
@@ -539,81 +672,33 @@ def get_mapped_sentiment(result):
|
|
| 539 |
|
| 540 |
|
| 541 |
def analyze_sentiment(text):
|
| 542 |
-
finbert_result = get_mapped_sentiment(finbert(text, truncation=True, max_length=512)[0])
|
| 543 |
-
roberta_result = get_mapped_sentiment(roberta(text, truncation=True, max_length=512)[0])
|
| 544 |
-
finbert_tone_result = get_mapped_sentiment(finbert_tone(text, truncation=True, max_length=512)[0])
|
| 545 |
-
|
| 546 |
-
# Consider sentiment negative if any model says it's negative
|
| 547 |
-
if any(result == "Negative" for result in [finbert_result, roberta_result, finbert_tone_result]):
|
| 548 |
-
return "Negative"
|
| 549 |
-
elif all(result == "Positive" for result in [finbert_result, roberta_result, finbert_tone_result]):
|
| 550 |
-
return "Positive"
|
| 551 |
-
return "Neutral"
|
| 552 |
-
|
| 553 |
-
def analyze_sentiment(text):
|
| 554 |
-
finbert_result = get_mapped_sentiment(finbert(text, truncation=True, max_length=512)[0])
|
| 555 |
-
roberta_result = get_mapped_sentiment(roberta(text, truncation=True, max_length=512)[0])
|
| 556 |
-
finbert_tone_result = get_mapped_sentiment(finbert_tone(text, truncation=True, max_length=512)[0])
|
| 557 |
-
|
| 558 |
-
# Count occurrences of each sentiment
|
| 559 |
-
sentiments = [finbert_result, roberta_result, finbert_tone_result]
|
| 560 |
-
sentiment_counts = {s: sentiments.count(s) for s in set(sentiments)}
|
| 561 |
-
|
| 562 |
-
# Return sentiment if at least two models agree, otherwise return Neutral
|
| 563 |
-
for sentiment, count in sentiment_counts.items():
|
| 564 |
-
if count >= 2:
|
| 565 |
-
return sentiment
|
| 566 |
-
return "Neutral"
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
def detect_events(llm, text, entity):
|
| 570 |
-
"""
|
| 571 |
-
Detect events in news text. This function works with both API-based LLMs and local models.
|
| 572 |
-
"""
|
| 573 |
-
# Initialize default return values
|
| 574 |
-
event_type = "Нет"
|
| 575 |
-
summary = ""
|
| 576 |
-
|
| 577 |
try:
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
response_text = response.content if hasattr(response, 'content') else str(response)
|
| 598 |
-
|
| 599 |
-
if "Тип:" in response_text and "Краткое описание:" in response_text:
|
| 600 |
-
type_part, summary_part = response_text.split("Краткое описание:")
|
| 601 |
-
event_type_temp = type_part.split("Тип:")[1].strip()
|
| 602 |
-
# Validate event type
|
| 603 |
-
valid_types = ["Отчетность", "РЦБ", "Суд", "Нет"]
|
| 604 |
-
if event_type_temp in valid_types:
|
| 605 |
-
event_type = event_type_temp
|
| 606 |
-
summary = summary_part.strip()
|
| 607 |
|
| 608 |
-
#
|
| 609 |
-
|
| 610 |
-
# Assuming llm is FallbackLLMSystem instance
|
| 611 |
-
event_type, summary = llm.detect_events(text, entity)
|
| 612 |
-
|
| 613 |
-
except Exception as e:
|
| 614 |
-
st.warning(f"Ошибка при анализе событий: {str(e)}")
|
| 615 |
|
| 616 |
-
|
|
|
|
|
|
|
|
|
|
| 617 |
|
| 618 |
def fuzzy_deduplicate(df, column, threshold=50):
|
| 619 |
seen_texts = []
|
|
@@ -852,12 +937,13 @@ def create_output_file(df, uploaded_file, llm):
|
|
| 852 |
wb.save(output)
|
| 853 |
output.seek(0)
|
| 854 |
return output
|
|
|
|
| 855 |
def main():
|
|
|
|
|
|
|
| 856 |
with st.sidebar:
|
| 857 |
-
st.title("::: AI-анализ мониторинга новостей (v.3.
|
| 858 |
-
st.subheader("по материалам СКАН-ИНТЕРФАКС
|
| 859 |
-
|
| 860 |
-
|
| 861 |
|
| 862 |
model_choice = st.radio(
|
| 863 |
"Выберите модель для анализа:",
|
|
@@ -865,53 +951,75 @@ def main():
|
|
| 865 |
key="model_selector",
|
| 866 |
help="Выберите модель для анализа новостей"
|
| 867 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 868 |
st.markdown(
|
| 869 |
-
|
| 870 |
-
|
| 871 |
-
|
| 872 |
-
|
| 873 |
-
|
| 874 |
-
|
| 875 |
-
|
| 876 |
-
|
| 877 |
-
with st.expander("ℹ️ Инструкция"):
|
| 878 |
-
st.markdown("""
|
| 879 |
-
1. Выберите модель для анализа
|
| 880 |
-
2. Выберите метод перевода
|
| 881 |
-
3. Загрузите Excel файл с новостями
|
| 882 |
-
4. Дождитесь завершения анализа
|
| 883 |
-
5. Скачайте результаты анализа в формате Excel
|
| 884 |
-
""", unsafe_allow_html=True)
|
| 885 |
-
|
| 886 |
-
|
| 887 |
-
st.markdown(
|
| 888 |
-
"""
|
| 889 |
-
<style>
|
| 890 |
-
.signature {
|
| 891 |
-
position: fixed;
|
| 892 |
-
right: 12px;
|
| 893 |
-
up: 12px;
|
| 894 |
-
font-size: 14px;
|
| 895 |
-
color: #FF0000;
|
| 896 |
-
opacity: 0.9;
|
| 897 |
-
z-index: 999;
|
| 898 |
-
}
|
| 899 |
-
</style>
|
| 900 |
-
<div class="signature">denis.pokrovsky.npff</div>
|
| 901 |
-
""",
|
| 902 |
-
unsafe_allow_html=True
|
| 903 |
)
|
| 904 |
|
|
|
|
| 905 |
st.title("Анализ мониторинга новостей")
|
| 906 |
|
|
|
|
| 907 |
if 'processed_df' not in st.session_state:
|
| 908 |
st.session_state.processed_df = None
|
|
|
|
|
|
|
|
|
|
| 909 |
|
| 910 |
-
|
| 911 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 912 |
|
| 913 |
if uploaded_file is not None and st.session_state.processed_df is None:
|
| 914 |
-
start_time = time.time()
|
|
|
|
| 915 |
try:
|
| 916 |
st.session_state.processed_df = process_file(
|
| 917 |
uploaded_file,
|
|
@@ -920,63 +1028,58 @@ def main():
|
|
| 920 |
)
|
| 921 |
|
| 922 |
if st.session_state.processed_df is not None:
|
| 923 |
-
|
| 924 |
-
|
| 925 |
-
preview_columns = ['Объект', 'Заголовок']
|
| 926 |
-
if 'Sentiment' in st.session_state.processed_df.columns:
|
| 927 |
-
preview_columns.append('Sentiment')
|
| 928 |
-
if 'Impact' in st.session_state.processed_df.columns:
|
| 929 |
-
preview_columns.append('Impact')
|
| 930 |
-
|
| 931 |
-
preview_df = st.session_state.processed_df[preview_columns].head()
|
| 932 |
-
st.dataframe(preview_df)
|
| 933 |
|
| 934 |
-
# Show
|
| 935 |
-
st.subheader("
|
| 936 |
-
|
| 937 |
-
|
| 938 |
-
|
| 939 |
-
|
| 940 |
-
|
| 941 |
-
|
| 942 |
-
|
| 943 |
-
st.
|
| 944 |
-
|
| 945 |
-
|
| 946 |
-
|
| 947 |
-
|
| 948 |
-
|
| 949 |
-
|
| 950 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 951 |
|
| 952 |
-
|
| 953 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 954 |
|
| 955 |
except Exception as e:
|
| 956 |
-
st.error(f"
|
| 957 |
st.session_state.processed_df = None
|
| 958 |
|
| 959 |
-
|
| 960 |
-
|
| 961 |
-
|
| 962 |
-
output = create_output_file(
|
| 963 |
-
st.session_state.processed_df,
|
| 964 |
-
uploaded_file,
|
| 965 |
-
init_langchain_llm(model_choice) # Initialize new LLM instance
|
| 966 |
-
)
|
| 967 |
-
|
| 968 |
-
|
| 969 |
-
end_time = time.time()
|
| 970 |
-
elapsed_time = end_time - start_time
|
| 971 |
-
formatted_time = format_elapsed_time(elapsed_time)
|
| 972 |
-
st.success(f"Обработка и анализ завершены за {formatted_time}.")
|
| 973 |
-
|
| 974 |
-
st.download_button(
|
| 975 |
-
label="Скачать результат анализа",
|
| 976 |
-
data=output,
|
| 977 |
-
file_name="результат_анализа.xlsx",
|
| 978 |
-
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
| 979 |
-
)
|
| 980 |
|
| 981 |
if __name__ == "__main__":
|
| 982 |
main()
|
|
|
|
| 29 |
AutoModelForCausalLM # 4 Qwen
|
| 30 |
)
|
| 31 |
|
| 32 |
+
from threading import Event
|
| 33 |
+
import threading
|
| 34 |
+
from queue import Queue
|
| 35 |
+
|
| 36 |
+
class ProcessControl:
|
| 37 |
+
def __init__(self):
|
| 38 |
+
self.pause_event = Event()
|
| 39 |
+
self.stop_event = Event()
|
| 40 |
+
self.pause_event.set() # Start in non-paused state
|
| 41 |
+
|
| 42 |
+
def pause(self):
|
| 43 |
+
self.pause_event.clear()
|
| 44 |
+
|
| 45 |
+
def resume(self):
|
| 46 |
+
self.pause_event.set()
|
| 47 |
+
|
| 48 |
+
def stop(self):
|
| 49 |
+
self.stop_event.set()
|
| 50 |
+
self.pause_event.set() # Ensure not stuck in pause
|
| 51 |
+
|
| 52 |
+
def reset(self):
|
| 53 |
+
self.stop_event.clear()
|
| 54 |
+
self.pause_event.set()
|
| 55 |
+
|
| 56 |
+
def is_paused(self):
|
| 57 |
+
return not self.pause_event.is_set()
|
| 58 |
+
|
| 59 |
+
def is_stopped(self):
|
| 60 |
+
return self.stop_event.is_set()
|
| 61 |
+
|
| 62 |
+
def wait_if_paused(self):
|
| 63 |
+
self.pause_event.wait()
|
| 64 |
+
|
| 65 |
+
|
| 66 |
class FallbackLLMSystem:
|
| 67 |
def __init__(self):
|
| 68 |
"""Initialize fallback models for event detection and reasoning"""
|
|
|
|
| 283 |
raise
|
| 284 |
|
| 285 |
|
| 286 |
+
class ProcessingUI:
|
| 287 |
+
def __init__(self):
|
| 288 |
+
if 'control' not in st.session_state:
|
| 289 |
+
st.session_state.control = ProcessControl()
|
| 290 |
+
if 'negative_container' not in st.session_state:
|
| 291 |
+
st.session_state.negative_container = st.empty()
|
| 292 |
+
if 'events_container' not in st.session_state:
|
| 293 |
+
st.session_state.events_container = st.empty()
|
| 294 |
+
|
| 295 |
+
# Create control buttons
|
| 296 |
+
col1, col2 = st.columns(2)
|
| 297 |
+
with col1:
|
| 298 |
+
if st.button("⏸️ Pause/Resume" if not st.session_state.control.is_paused() else "▶️ Resume", key="pause_button"):
|
| 299 |
+
if st.session_state.control.is_paused():
|
| 300 |
+
st.session_state.control.resume()
|
| 301 |
+
else:
|
| 302 |
+
st.session_state.control.pause()
|
| 303 |
+
|
| 304 |
+
with col2:
|
| 305 |
+
if st.button("⏹️ Stop", key="stop_button"):
|
| 306 |
+
st.session_state.control.stop()
|
| 307 |
+
|
| 308 |
+
self.progress_bar = st.progress(0)
|
| 309 |
+
self.status = st.empty()
|
| 310 |
+
|
| 311 |
+
def update_progress(self, current, total):
|
| 312 |
+
progress = current / total
|
| 313 |
+
self.progress_bar.progress(progress)
|
| 314 |
+
self.status.text(f"Processing {current} of {total} items...")
|
| 315 |
+
|
| 316 |
+
def show_negative(self, entity, headline, analysis, impact=None):
|
| 317 |
+
with st.session_state.negative_container:
|
| 318 |
+
st.markdown(f"""
|
| 319 |
+
<div style='background-color: #ffebee; padding: 10px; border-radius: 5px; margin: 5px 0;'>
|
| 320 |
+
<strong style='color: #d32f2f;'>⚠️ Negative Alert:</strong><br>
|
| 321 |
+
<strong>Entity:</strong> {entity}<br>
|
| 322 |
+
<strong>News:</strong> {headline}<br>
|
| 323 |
+
<strong>Analysis:</strong> {analysis}<br>
|
| 324 |
+
{f"<strong>Impact:</strong> {impact}<br>" if impact else ""}
|
| 325 |
+
</div>
|
| 326 |
+
""", unsafe_allow_html=True)
|
| 327 |
+
|
| 328 |
+
def show_event(self, entity, event_type, headline):
|
| 329 |
+
with st.session_state.events_container:
|
| 330 |
+
st.markdown(f"""
|
| 331 |
+
<div style='background-color: #e3f2fd; padding: 10px; border-radius: 5px; margin: 5px 0;'>
|
| 332 |
+
<strong style='color: #1976d2;'>🔔 Event Detected:</strong><br>
|
| 333 |
+
<strong>Entity:</strong> {entity}<br>
|
| 334 |
+
<strong>Type:</strong> {event_type}<br>
|
| 335 |
+
<strong>News:</strong> {headline}
|
| 336 |
+
</div>
|
| 337 |
+
""", unsafe_allow_html=True)
|
| 338 |
+
|
| 339 |
+
class EventDetectionSystem:
|
| 340 |
+
def __init__(self):
|
| 341 |
+
try:
|
| 342 |
+
# Initialize models with specific labels
|
| 343 |
+
self.finbert = pipeline(
|
| 344 |
+
"text-classification",
|
| 345 |
+
model="ProsusAI/finbert",
|
| 346 |
+
return_all_scores=True
|
| 347 |
+
)
|
| 348 |
+
self.business_classifier = pipeline(
|
| 349 |
+
"text-classification",
|
| 350 |
+
model="yiyanghkust/finbert-tone",
|
| 351 |
+
return_all_scores=True
|
| 352 |
+
)
|
| 353 |
+
st.success("BERT models initialized for event detection")
|
| 354 |
+
except Exception as e:
|
| 355 |
+
st.error(f"Error initializing BERT models: {str(e)}")
|
| 356 |
+
raise
|
| 357 |
+
|
| 358 |
+
def detect_event_type(self, text, entity):
|
| 359 |
+
event_type = "Нет"
|
| 360 |
+
summary = ""
|
| 361 |
+
|
| 362 |
+
try:
|
| 363 |
+
# Ensure text is properly formatted
|
| 364 |
+
text = str(text).strip()
|
| 365 |
+
if not text:
|
| 366 |
+
return "Нет", "Empty text"
|
| 367 |
+
|
| 368 |
+
# Get predictions
|
| 369 |
+
finbert_scores = self.finbert(
|
| 370 |
+
text,
|
| 371 |
+
truncation=True,
|
| 372 |
+
max_length=512
|
| 373 |
+
)
|
| 374 |
+
business_scores = self.business_classifier(
|
| 375 |
+
text,
|
| 376 |
+
truncation=True,
|
| 377 |
+
max_length=512
|
| 378 |
+
)
|
| 379 |
+
|
| 380 |
+
# Get highest scoring predictions
|
| 381 |
+
finbert_pred = max(finbert_scores[0], key=lambda x: x['score'])
|
| 382 |
+
business_pred = max(business_scores[0], key=lambda x: x['score'])
|
| 383 |
+
|
| 384 |
+
# Map to event types with confidence threshold
|
| 385 |
+
confidence_threshold = 0.6
|
| 386 |
+
max_confidence = max(finbert_pred['score'], business_pred['score'])
|
| 387 |
+
|
| 388 |
+
if max_confidence >= confidence_threshold:
|
| 389 |
+
if any(term in text.lower() for term in ['отчет', 'выручка', 'прибыль', 'ebitda']):
|
| 390 |
+
event_type = "Отчетность"
|
| 391 |
+
summary = f"Финансовая отчетность (confidence: {max_confidence:.2f})"
|
| 392 |
+
elif any(term in text.lower() for term in ['облигаци', 'купон', 'дефолт', 'реструктуризац']):
|
| 393 |
+
event_type = "РЦБ"
|
| 394 |
+
summary = f"Событие РЦБ (confidence: {max_confidence:.2f})"
|
| 395 |
+
elif any(term in text.lower() for term in ['суд', 'иск', 'арбитраж']):
|
| 396 |
+
event_type = "Суд"
|
| 397 |
+
summary = f"Судебное разбирательство (confidence: {max_confidence:.2f})"
|
| 398 |
+
|
| 399 |
+
if event_type != "Нет":
|
| 400 |
+
summary += f"\nКомпания: {entity}"
|
| 401 |
+
|
| 402 |
+
return event_type, summary
|
| 403 |
+
|
| 404 |
+
except Exception as e:
|
| 405 |
+
st.warning(f"Event detection error: {str(e)}")
|
| 406 |
+
return "Нет", "Error in event detection"
|
| 407 |
+
|
| 408 |
class TranslationSystem:
|
| 409 |
+
def __init__(self):
|
| 410 |
+
"""Initialize translation system using Helsinki NLP model"""
|
|
|
|
|
|
|
| 411 |
try:
|
| 412 |
+
self.translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ru-en")
|
| 413 |
+
st.success("Translation system initialized")
|
| 414 |
except Exception as e:
|
| 415 |
+
st.error(f"Error initializing translator: {str(e)}")
|
| 416 |
raise
|
| 417 |
|
| 418 |
def translate_text(self, text):
|
|
|
|
|
|
|
|
|
|
| 419 |
if pd.isna(text) or not isinstance(text, str) or not text.strip():
|
| 420 |
+
return str(text) if pd.notna(text) else ""
|
| 421 |
|
| 422 |
text = str(text).strip()
|
| 423 |
if not text:
|
| 424 |
+
return ""
|
| 425 |
|
| 426 |
try:
|
| 427 |
+
max_chunk_size = 450
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
chunks = self._split_into_chunks(text, max_chunk_size)
|
| 429 |
translated_chunks = []
|
| 430 |
|
| 431 |
for chunk in chunks:
|
| 432 |
+
if not chunk.strip():
|
| 433 |
+
continue
|
| 434 |
+
|
| 435 |
+
try:
|
| 436 |
+
result = self.translator(chunk, max_length=512)
|
| 437 |
+
if result and isinstance(result, list) and len(result) > 0:
|
| 438 |
+
translated_chunks.append(result[0].get('translation_text', chunk))
|
| 439 |
+
else:
|
| 440 |
+
translated_chunks.append(chunk)
|
| 441 |
+
except Exception as e:
|
| 442 |
+
st.warning(f"Chunk translation error: {str(e)}")
|
| 443 |
+
translated_chunks.append(chunk)
|
| 444 |
+
time.sleep(0.1)
|
| 445 |
|
| 446 |
return ' '.join(translated_chunks)
|
| 447 |
|
| 448 |
except Exception as e:
|
| 449 |
+
st.warning(f"Translation error: {str(e)}")
|
| 450 |
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 451 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 452 |
|
| 453 |
|
| 454 |
def process_file(uploaded_file, model_choice, translation_method=None):
|
| 455 |
df = None
|
| 456 |
try:
|
| 457 |
+
# Initialize UI and control systems
|
| 458 |
+
ui = ProcessingUI()
|
| 459 |
+
translator = TranslationSystem()
|
| 460 |
+
event_detector = EventDetectionSystem()
|
| 461 |
+
|
| 462 |
+
# Load and prepare data
|
| 463 |
df = pd.read_excel(uploaded_file, sheet_name='Публикации')
|
| 464 |
llm = init_langchain_llm(model_choice)
|
|
|
|
|
|
|
|
|
|
| 465 |
|
| 466 |
+
# Initialize Groq for impact estimation
|
| 467 |
groq_llm = ensure_groq_llm()
|
| 468 |
if groq_llm is None:
|
| 469 |
st.warning("Failed to initialize Groq LLM for impact estimation. Using fallback model.")
|
| 470 |
|
| 471 |
+
# Prepare dataframe
|
| 472 |
+
text_columns = ['Объект', 'Заголовок', 'Выдержки из текста']
|
| 473 |
+
for col in text_columns:
|
| 474 |
+
df[col] = df[col].fillna('').astype(str).apply(lambda x: x.strip())
|
| 475 |
+
|
| 476 |
+
# Initialize required columns
|
| 477 |
df['Translated'] = ''
|
| 478 |
df['Sentiment'] = ''
|
| 479 |
df['Impact'] = ''
|
|
|
|
| 481 |
df['Event_Type'] = ''
|
| 482 |
df['Event_Summary'] = ''
|
| 483 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 484 |
# Deduplication
|
| 485 |
+
original_count = len(df)
|
| 486 |
df = df.groupby('Объект', group_keys=False).apply(
|
| 487 |
lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65)
|
| 488 |
).reset_index(drop=True)
|
| 489 |
+
st.write(f"Removed {original_count - len(df)} duplicates.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 490 |
|
| 491 |
+
# Process rows
|
| 492 |
+
total_rows = len(df)
|
| 493 |
+
processed_rows = 0
|
| 494 |
+
|
| 495 |
+
for idx, row in df.iterrows():
|
| 496 |
+
# Check for stop/pause
|
| 497 |
+
if st.session_state.control.is_stopped():
|
| 498 |
+
st.warning("Processing stopped by user")
|
| 499 |
+
break
|
| 500 |
+
|
| 501 |
+
st.session_state.control.wait_if_paused()
|
| 502 |
+
if st.session_state.control.is_paused():
|
| 503 |
+
st.info("Processing paused... Click Resume to continue")
|
| 504 |
+
continue
|
| 505 |
+
|
| 506 |
+
try:
|
| 507 |
+
# Translation
|
| 508 |
+
translated_text = translator.translate_text(row['Выдержки из текста'])
|
| 509 |
+
df.at[idx, 'Translated'] = translated_text
|
| 510 |
+
|
| 511 |
+
# Sentiment analysis
|
| 512 |
+
sentiment = analyze_sentiment(translated_text)
|
| 513 |
+
df.at[idx, 'Sentiment'] = sentiment
|
| 514 |
+
|
| 515 |
+
# Event detection using BERT
|
| 516 |
+
event_type, event_summary = event_detector.detect_event_type(
|
| 517 |
+
translated_text,
|
| 518 |
+
row['Объект']
|
| 519 |
+
)
|
| 520 |
+
df.at[idx, 'Event_Type'] = event_type
|
| 521 |
+
df.at[idx, 'Event_Summary'] = event_summary
|
| 522 |
+
|
| 523 |
+
# Show events in real-time
|
| 524 |
+
if event_type != "Нет":
|
| 525 |
+
ui.show_event(
|
| 526 |
+
row['Объект'],
|
| 527 |
+
event_type,
|
| 528 |
+
row['Заголовок']
|
| 529 |
+
)
|
| 530 |
+
|
| 531 |
+
# Handle negative sentiment
|
| 532 |
+
if sentiment == "Negative":
|
| 533 |
try:
|
| 534 |
+
impact, reasoning = estimate_impact(
|
| 535 |
+
groq_llm if groq_llm is not None else llm,
|
| 536 |
+
translated_text,
|
|
|
|
| 537 |
row['Объект']
|
| 538 |
)
|
| 539 |
except Exception as e:
|
| 540 |
+
impact = "Неопределенный эффект"
|
| 541 |
+
reasoning = "Error in impact estimation"
|
| 542 |
if 'rate limit' in str(e).lower():
|
| 543 |
+
st.warning("Rate limit reached. Using fallback values.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 544 |
|
| 545 |
+
df.at[idx, 'Impact'] = impact
|
| 546 |
+
df.at[idx, 'Reasoning'] = reasoning
|
| 547 |
|
| 548 |
+
# Show negative alert in real-time
|
| 549 |
+
ui.show_negative(
|
| 550 |
+
row['Объект'],
|
| 551 |
+
row['Заголовок'],
|
| 552 |
+
reasoning,
|
| 553 |
+
impact
|
| 554 |
+
)
|
| 555 |
+
|
| 556 |
+
# Update progress
|
| 557 |
+
processed_rows += 1
|
| 558 |
+
ui.update_progress(processed_rows, total_rows)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 559 |
|
| 560 |
+
except Exception as e:
|
| 561 |
+
st.warning(f"Error processing row {idx + 1}: {str(e)}")
|
| 562 |
+
continue
|
| 563 |
|
| 564 |
+
time.sleep(0.1)
|
| 565 |
+
|
| 566 |
+
# Handle stopped processing
|
| 567 |
+
if st.session_state.control.is_stopped() and len(df) > 0:
|
| 568 |
+
st.warning("Processing was stopped. Showing partial results.")
|
| 569 |
+
if st.button("Download Partial Results"):
|
| 570 |
+
output = create_output_file(df, uploaded_file, llm)
|
| 571 |
+
st.download_button(
|
| 572 |
+
label="📊 Download Partial Results",
|
| 573 |
+
data=output,
|
| 574 |
+
file_name="partial_analysis.xlsx",
|
| 575 |
+
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
| 576 |
+
)
|
| 577 |
|
| 578 |
return df
|
| 579 |
|
| 580 |
except Exception as e:
|
| 581 |
+
st.error(f"Error processing file: {str(e)}")
|
| 582 |
return None
|
| 583 |
|
| 584 |
def translate_reasoning_to_russian(llm, text):
|
|
|
|
| 672 |
|
| 673 |
|
| 674 |
def analyze_sentiment(text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 675 |
try:
|
| 676 |
+
finbert_result = get_mapped_sentiment(
|
| 677 |
+
finbert(text, truncation=True, max_length=512)[0]
|
| 678 |
+
)
|
| 679 |
+
roberta_result = get_mapped_sentiment(
|
| 680 |
+
roberta(text, truncation=True, max_length=512)[0]
|
| 681 |
+
)
|
| 682 |
+
finbert_tone_result = get_mapped_sentiment(
|
| 683 |
+
finbert_tone(text, truncation=True, max_length=512)[0]
|
| 684 |
+
)
|
| 685 |
+
|
| 686 |
+
# Count occurrences of each sentiment
|
| 687 |
+
sentiments = [finbert_result, roberta_result, finbert_tone_result]
|
| 688 |
+
sentiment_counts = {s: sentiments.count(s) for s in set(sentiments)}
|
| 689 |
+
|
| 690 |
+
# Return sentiment if at least two models agree
|
| 691 |
+
for sentiment, count in sentiment_counts.items():
|
| 692 |
+
if count >= 2:
|
| 693 |
+
return sentiment
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 694 |
|
| 695 |
+
# Default to Neutral if no agreement
|
| 696 |
+
return "Neutral"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 697 |
|
| 698 |
+
except Exception as e:
|
| 699 |
+
st.warning(f"Sentiment analysis error: {str(e)}")
|
| 700 |
+
return "Neutral"
|
| 701 |
+
|
| 702 |
|
| 703 |
def fuzzy_deduplicate(df, column, threshold=50):
|
| 704 |
seen_texts = []
|
|
|
|
| 937 |
wb.save(output)
|
| 938 |
output.seek(0)
|
| 939 |
return output
|
| 940 |
+
|
| 941 |
def main():
|
| 942 |
+
st.set_page_config(layout="wide")
|
| 943 |
+
|
| 944 |
with st.sidebar:
|
| 945 |
+
st.title("::: AI-анализ мониторинга новостей (v.3.54):::")
|
| 946 |
+
st.subheader("по материалам СКАН-ИНТЕРФАКС")
|
|
|
|
|
|
|
| 947 |
|
| 948 |
model_choice = st.radio(
|
| 949 |
"Выберите модель для анализа:",
|
|
|
|
| 951 |
key="model_selector",
|
| 952 |
help="Выберите модель для анализа новостей"
|
| 953 |
)
|
| 954 |
+
|
| 955 |
+
uploaded_file = st.file_uploader(
|
| 956 |
+
"Выбирайте Excel-файл",
|
| 957 |
+
type="xlsx",
|
| 958 |
+
key="file_uploader"
|
| 959 |
+
)
|
| 960 |
+
|
| 961 |
st.markdown(
|
| 962 |
+
"""
|
| 963 |
+
Использованы технологии:
|
| 964 |
+
- Анализ естественного языка с помощью предтренированных нейросетей **BERT**
|
| 965 |
+
- Дополнительная обработка при помощи больших языковых моделей (**LLM**)
|
| 966 |
+
- Фреймворк **LangChain** для оркестрации
|
| 967 |
+
""",
|
| 968 |
+
unsafe_allow_html=True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 969 |
)
|
| 970 |
|
| 971 |
+
# Main content area
|
| 972 |
st.title("Анализ мониторинга новостей")
|
| 973 |
|
| 974 |
+
# Initialize session state
|
| 975 |
if 'processed_df' not in st.session_state:
|
| 976 |
st.session_state.processed_df = None
|
| 977 |
+
|
| 978 |
+
# Create display areas
|
| 979 |
+
col1, col2 = st.columns([2, 1])
|
| 980 |
|
| 981 |
+
with col1:
|
| 982 |
+
# Area for real-time updates
|
| 983 |
+
st.subheader("Live Updates")
|
| 984 |
+
st.markdown("""
|
| 985 |
+
<style>
|
| 986 |
+
.stProgress .st-bo {
|
| 987 |
+
background-color: #f0f2f6;
|
| 988 |
+
}
|
| 989 |
+
.negative-alert {
|
| 990 |
+
background-color: #ffebee;
|
| 991 |
+
border-left: 5px solid #f44336;
|
| 992 |
+
padding: 10px;
|
| 993 |
+
margin: 5px 0;
|
| 994 |
+
}
|
| 995 |
+
.event-alert {
|
| 996 |
+
background-color: #e3f2fd;
|
| 997 |
+
border-left: 5px solid #2196f3;
|
| 998 |
+
padding: 10px;
|
| 999 |
+
margin: 5px 0;
|
| 1000 |
+
}
|
| 1001 |
+
</style>
|
| 1002 |
+
""", unsafe_allow_html=True)
|
| 1003 |
+
|
| 1004 |
+
with col2:
|
| 1005 |
+
# Area for statistics
|
| 1006 |
+
st.subheader("Statistics")
|
| 1007 |
+
if st.session_state.processed_df is not None:
|
| 1008 |
+
st.metric("Total Items", len(st.session_state.processed_df))
|
| 1009 |
+
st.metric("Negative Items",
|
| 1010 |
+
len(st.session_state.processed_df[
|
| 1011 |
+
st.session_state.processed_df['Sentiment'] == 'Negative'
|
| 1012 |
+
])
|
| 1013 |
+
)
|
| 1014 |
+
st.metric("Events Detected",
|
| 1015 |
+
len(st.session_state.processed_df[
|
| 1016 |
+
st.session_state.processed_df['Event_Type'] != 'Нет'
|
| 1017 |
+
])
|
| 1018 |
+
)
|
| 1019 |
|
| 1020 |
if uploaded_file is not None and st.session_state.processed_df is None:
|
| 1021 |
+
start_time = time.time()
|
| 1022 |
+
|
| 1023 |
try:
|
| 1024 |
st.session_state.processed_df = process_file(
|
| 1025 |
uploaded_file,
|
|
|
|
| 1028 |
)
|
| 1029 |
|
| 1030 |
if st.session_state.processed_df is not None:
|
| 1031 |
+
end_time = time.time()
|
| 1032 |
+
elapsed_time = format_elapsed_time(end_time - start_time)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1033 |
|
| 1034 |
+
# Show results
|
| 1035 |
+
st.subheader("Results Summary")
|
| 1036 |
+
|
| 1037 |
+
# Display statistics
|
| 1038 |
+
stats_cols = st.columns(4)
|
| 1039 |
+
with stats_cols[0]:
|
| 1040 |
+
st.metric("Total Processed", len(st.session_state.processed_df))
|
| 1041 |
+
with stats_cols[1]:
|
| 1042 |
+
st.metric("Negative Items",
|
| 1043 |
+
len(st.session_state.processed_df[
|
| 1044 |
+
st.session_state.processed_df['Sentiment'] == 'Negative'
|
| 1045 |
+
])
|
| 1046 |
+
)
|
| 1047 |
+
with stats_cols[2]:
|
| 1048 |
+
st.metric("Events Detected",
|
| 1049 |
+
len(st.session_state.processed_df[
|
| 1050 |
+
st.session_state.processed_df['Event_Type'] != 'Нет'
|
| 1051 |
+
])
|
| 1052 |
+
)
|
| 1053 |
+
with stats_cols[3]:
|
| 1054 |
+
st.metric("Processing Time", elapsed_time)
|
| 1055 |
|
| 1056 |
+
# Show data previews
|
| 1057 |
+
with st.expander("📊 Data Preview", expanded=True):
|
| 1058 |
+
preview_cols = ['Объект', 'Заголовок', 'Sentiment', 'Event_Type']
|
| 1059 |
+
st.dataframe(
|
| 1060 |
+
st.session_state.processed_df[preview_cols],
|
| 1061 |
+
use_container_width=True
|
| 1062 |
+
)
|
| 1063 |
+
|
| 1064 |
+
# Create downloadable report
|
| 1065 |
+
output = create_output_file(
|
| 1066 |
+
st.session_state.processed_df,
|
| 1067 |
+
uploaded_file,
|
| 1068 |
+
init_langchain_llm(model_choice)
|
| 1069 |
+
)
|
| 1070 |
+
|
| 1071 |
+
st.download_button(
|
| 1072 |
+
label="📥 Download Full Report",
|
| 1073 |
+
data=output,
|
| 1074 |
+
file_name="analysis_report.xlsx",
|
| 1075 |
+
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
| 1076 |
+
key='download_button'
|
| 1077 |
+
)
|
| 1078 |
|
| 1079 |
except Exception as e:
|
| 1080 |
+
st.error(f"Error processing file: {str(e)}")
|
| 1081 |
st.session_state.processed_df = None
|
| 1082 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1083 |
|
| 1084 |
if __name__ == "__main__":
|
| 1085 |
main()
|
requirements.txt
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
streamlit
|
| 2 |
pandas
|
| 3 |
-
vaderSentiment
|
| 4 |
transformers>=4.30.0
|
| 5 |
torch
|
| 6 |
tqdm
|
|
@@ -20,5 +19,4 @@ pdfkit
|
|
| 20 |
Jinja2==3.1.2
|
| 21 |
langchain_openai
|
| 22 |
optimum
|
| 23 |
-
|
| 24 |
-
deep_translator
|
|
|
|
| 1 |
streamlit
|
| 2 |
pandas
|
|
|
|
| 3 |
transformers>=4.30.0
|
| 4 |
torch
|
| 5 |
tqdm
|
|
|
|
| 19 |
Jinja2==3.1.2
|
| 20 |
langchain_openai
|
| 21 |
optimum
|
| 22 |
+
sentencepiece
|
|
|