Spaces:
Sleeping
Sleeping
Commit ·
59bee7f
1
Parent(s): 78ed556
3.56
Browse files
app.py
CHANGED
|
@@ -19,7 +19,6 @@ import time
|
|
| 19 |
from tenacity import retry, stop_after_attempt, wait_exponential
|
| 20 |
from typing import Optional
|
| 21 |
import torch
|
| 22 |
-
|
| 23 |
from transformers import (
|
| 24 |
pipeline,
|
| 25 |
AutoModelForSeq2SeqLM,
|
|
@@ -293,14 +292,14 @@ class ProcessingUI:
|
|
| 293 |
# Create control buttons
|
| 294 |
col1, col2 = st.columns(2)
|
| 295 |
with col1:
|
| 296 |
-
if st.button("⏸️
|
| 297 |
if st.session_state.control.is_paused():
|
| 298 |
st.session_state.control.resume()
|
| 299 |
else:
|
| 300 |
st.session_state.control.pause()
|
| 301 |
|
| 302 |
with col2:
|
| 303 |
-
if st.button("⏹️
|
| 304 |
st.session_state.control.stop()
|
| 305 |
|
| 306 |
self.progress_bar = st.progress(0)
|
|
@@ -309,7 +308,7 @@ class ProcessingUI:
|
|
| 309 |
def update_progress(self, current, total):
|
| 310 |
progress = current / total
|
| 311 |
self.progress_bar.progress(progress)
|
| 312 |
-
self.status.text(f"
|
| 313 |
|
| 314 |
def show_negative(self, entity, headline, analysis, impact=None):
|
| 315 |
with st.session_state.negative_container:
|
|
@@ -447,6 +446,29 @@ class TranslationSystem:
|
|
| 447 |
st.warning(f"Translation error: {str(e)}")
|
| 448 |
return text
|
| 449 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 450 |
|
| 451 |
|
| 452 |
def process_file(uploaded_file, model_choice, translation_method=None):
|
|
@@ -484,7 +506,7 @@ def process_file(uploaded_file, model_choice, translation_method=None):
|
|
| 484 |
df = df.groupby('Объект', group_keys=False).apply(
|
| 485 |
lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65)
|
| 486 |
).reset_index(drop=True)
|
| 487 |
-
st.write(f"
|
| 488 |
|
| 489 |
# Process rows
|
| 490 |
total_rows = len(df)
|
|
@@ -493,12 +515,12 @@ def process_file(uploaded_file, model_choice, translation_method=None):
|
|
| 493 |
for idx, row in df.iterrows():
|
| 494 |
# Check for stop/pause
|
| 495 |
if st.session_state.control.is_stopped():
|
| 496 |
-
st.warning("
|
| 497 |
break
|
| 498 |
|
| 499 |
st.session_state.control.wait_if_paused()
|
| 500 |
if st.session_state.control.is_paused():
|
| 501 |
-
st.info("
|
| 502 |
continue
|
| 503 |
|
| 504 |
try:
|
|
@@ -538,7 +560,7 @@ def process_file(uploaded_file, model_choice, translation_method=None):
|
|
| 538 |
impact = "Неопределенный эффект"
|
| 539 |
reasoning = "Error in impact estimation"
|
| 540 |
if 'rate limit' in str(e).lower():
|
| 541 |
-
st.warning("
|
| 542 |
|
| 543 |
df.at[idx, 'Impact'] = impact
|
| 544 |
df.at[idx, 'Reasoning'] = reasoning
|
|
@@ -556,18 +578,18 @@ def process_file(uploaded_file, model_choice, translation_method=None):
|
|
| 556 |
ui.update_progress(processed_rows, total_rows)
|
| 557 |
|
| 558 |
except Exception as e:
|
| 559 |
-
st.warning(f"
|
| 560 |
continue
|
| 561 |
|
| 562 |
time.sleep(0.1)
|
| 563 |
|
| 564 |
# Handle stopped processing
|
| 565 |
if st.session_state.control.is_stopped() and len(df) > 0:
|
| 566 |
-
st.warning("
|
| 567 |
-
if st.button("
|
| 568 |
output = create_output_file(df, uploaded_file, llm)
|
| 569 |
st.download_button(
|
| 570 |
-
label="📊
|
| 571 |
data=output,
|
| 572 |
file_name="partial_analysis.xlsx",
|
| 573 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
@@ -576,7 +598,7 @@ def process_file(uploaded_file, model_choice, translation_method=None):
|
|
| 576 |
return df
|
| 577 |
|
| 578 |
except Exception as e:
|
| 579 |
-
st.error(f"
|
| 580 |
return None
|
| 581 |
|
| 582 |
def translate_reasoning_to_russian(llm, text):
|
|
@@ -940,12 +962,12 @@ def main():
|
|
| 940 |
st.set_page_config(layout="wide")
|
| 941 |
|
| 942 |
with st.sidebar:
|
| 943 |
-
st.title("::: AI-анализ мониторинга новостей (v.3.
|
| 944 |
st.subheader("по материалам СКАН-ИНТЕРФАКС")
|
| 945 |
|
| 946 |
model_choice = st.radio(
|
| 947 |
"Выберите модель для анализа:",
|
| 948 |
-
["Qwen2.5-Coder", "Groq (llama-3.1-70b)", "ChatGPT-4-mini"
|
| 949 |
key="model_selector",
|
| 950 |
help="Выберите модель для анализа новостей"
|
| 951 |
)
|
|
@@ -978,7 +1000,7 @@ def main():
|
|
| 978 |
|
| 979 |
with col1:
|
| 980 |
# Area for real-time updates
|
| 981 |
-
st.subheader("
|
| 982 |
st.markdown("""
|
| 983 |
<style>
|
| 984 |
.stProgress .st-bo {
|
|
@@ -1001,15 +1023,15 @@ def main():
|
|
| 1001 |
|
| 1002 |
with col2:
|
| 1003 |
# Area for statistics
|
| 1004 |
-
st.subheader("
|
| 1005 |
if st.session_state.processed_df is not None:
|
| 1006 |
-
st.metric("
|
| 1007 |
-
st.metric("
|
| 1008 |
len(st.session_state.processed_df[
|
| 1009 |
st.session_state.processed_df['Sentiment'] == 'Negative'
|
| 1010 |
])
|
| 1011 |
)
|
| 1012 |
-
st.metric("
|
| 1013 |
len(st.session_state.processed_df[
|
| 1014 |
st.session_state.processed_df['Event_Type'] != 'Нет'
|
| 1015 |
])
|
|
@@ -1030,29 +1052,29 @@ def main():
|
|
| 1030 |
elapsed_time = format_elapsed_time(end_time - start_time)
|
| 1031 |
|
| 1032 |
# Show results
|
| 1033 |
-
st.subheader("
|
| 1034 |
|
| 1035 |
# Display statistics
|
| 1036 |
stats_cols = st.columns(4)
|
| 1037 |
with stats_cols[0]:
|
| 1038 |
-
st.metric("
|
| 1039 |
with stats_cols[1]:
|
| 1040 |
-
st.metric("
|
| 1041 |
len(st.session_state.processed_df[
|
| 1042 |
st.session_state.processed_df['Sentiment'] == 'Negative'
|
| 1043 |
])
|
| 1044 |
)
|
| 1045 |
with stats_cols[2]:
|
| 1046 |
-
st.metric("
|
| 1047 |
len(st.session_state.processed_df[
|
| 1048 |
st.session_state.processed_df['Event_Type'] != 'Нет'
|
| 1049 |
])
|
| 1050 |
)
|
| 1051 |
with stats_cols[3]:
|
| 1052 |
-
st.metric("
|
| 1053 |
|
| 1054 |
# Show data previews
|
| 1055 |
-
with st.expander("📊
|
| 1056 |
preview_cols = ['Объект', 'Заголовок', 'Sentiment', 'Event_Type']
|
| 1057 |
st.dataframe(
|
| 1058 |
st.session_state.processed_df[preview_cols],
|
|
@@ -1067,15 +1089,15 @@ def main():
|
|
| 1067 |
)
|
| 1068 |
|
| 1069 |
st.download_button(
|
| 1070 |
-
label="📥
|
| 1071 |
data=output,
|
| 1072 |
-
file_name="
|
| 1073 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
| 1074 |
key='download_button'
|
| 1075 |
)
|
| 1076 |
|
| 1077 |
except Exception as e:
|
| 1078 |
-
st.error(f"
|
| 1079 |
st.session_state.processed_df = None
|
| 1080 |
|
| 1081 |
|
|
|
|
| 19 |
from tenacity import retry, stop_after_attempt, wait_exponential
|
| 20 |
from typing import Optional
|
| 21 |
import torch
|
|
|
|
| 22 |
from transformers import (
|
| 23 |
pipeline,
|
| 24 |
AutoModelForSeq2SeqLM,
|
|
|
|
| 292 |
# Create control buttons
|
| 293 |
col1, col2 = st.columns(2)
|
| 294 |
with col1:
|
| 295 |
+
if st.button("⏸️ Пауза/Возобновить" if not st.session_state.control.is_paused() else "▶️ Возобновить", key="pause_button"):
|
| 296 |
if st.session_state.control.is_paused():
|
| 297 |
st.session_state.control.resume()
|
| 298 |
else:
|
| 299 |
st.session_state.control.pause()
|
| 300 |
|
| 301 |
with col2:
|
| 302 |
+
if st.button("⏹️ Стоп и всё", key="stop_button"):
|
| 303 |
st.session_state.control.stop()
|
| 304 |
|
| 305 |
self.progress_bar = st.progress(0)
|
|
|
|
| 308 |
def update_progress(self, current, total):
|
| 309 |
progress = current / total
|
| 310 |
self.progress_bar.progress(progress)
|
| 311 |
+
self.status.text(f"Обрабатываем {current} из {total} сообщений...")
|
| 312 |
|
| 313 |
def show_negative(self, entity, headline, analysis, impact=None):
|
| 314 |
with st.session_state.negative_container:
|
|
|
|
| 446 |
st.warning(f"Translation error: {str(e)}")
|
| 447 |
return text
|
| 448 |
|
| 449 |
+
def _split_into_chunks(self, text, max_length):
|
| 450 |
+
sentences = []
|
| 451 |
+
for s in text.replace('!', '.').replace('?', '.').split('.'):
|
| 452 |
+
s = s.strip()
|
| 453 |
+
if s:
|
| 454 |
+
if len(s) > max_length:
|
| 455 |
+
# Split long sentences into smaller chunks
|
| 456 |
+
words = s.split()
|
| 457 |
+
current_chunk = []
|
| 458 |
+
current_length = 0
|
| 459 |
+
for word in words:
|
| 460 |
+
if current_length + len(word) > max_length:
|
| 461 |
+
sentences.append(' '.join(current_chunk))
|
| 462 |
+
current_chunk = [word]
|
| 463 |
+
current_length = len(word)
|
| 464 |
+
else:
|
| 465 |
+
current_chunk.append(word)
|
| 466 |
+
current_length += len(word) + 1
|
| 467 |
+
if current_chunk:
|
| 468 |
+
sentences.append(' '.join(current_chunk))
|
| 469 |
+
else:
|
| 470 |
+
sentences.append(s)
|
| 471 |
+
|
| 472 |
|
| 473 |
|
| 474 |
def process_file(uploaded_file, model_choice, translation_method=None):
|
|
|
|
| 506 |
df = df.groupby('Объект', group_keys=False).apply(
|
| 507 |
lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 65)
|
| 508 |
).reset_index(drop=True)
|
| 509 |
+
st.write(f"Из {original_count} сообщений удалено {original_count - len(df)} дубликатов.")
|
| 510 |
|
| 511 |
# Process rows
|
| 512 |
total_rows = len(df)
|
|
|
|
| 515 |
for idx, row in df.iterrows():
|
| 516 |
# Check for stop/pause
|
| 517 |
if st.session_state.control.is_stopped():
|
| 518 |
+
st.warning("Обработку остановили")
|
| 519 |
break
|
| 520 |
|
| 521 |
st.session_state.control.wait_if_paused()
|
| 522 |
if st.session_state.control.is_paused():
|
| 523 |
+
st.info("Обработка на паузе. Можно возобновить.")
|
| 524 |
continue
|
| 525 |
|
| 526 |
try:
|
|
|
|
| 560 |
impact = "Неопределенный эффект"
|
| 561 |
reasoning = "Error in impact estimation"
|
| 562 |
if 'rate limit' in str(e).lower():
|
| 563 |
+
st.warning("Лимит запросов исчерпался. Иду на fallback.")
|
| 564 |
|
| 565 |
df.at[idx, 'Impact'] = impact
|
| 566 |
df.at[idx, 'Reasoning'] = reasoning
|
|
|
|
| 578 |
ui.update_progress(processed_rows, total_rows)
|
| 579 |
|
| 580 |
except Exception as e:
|
| 581 |
+
st.warning(f"Ошибка в обработке ряда {idx + 1}: {str(e)}")
|
| 582 |
continue
|
| 583 |
|
| 584 |
time.sleep(0.1)
|
| 585 |
|
| 586 |
# Handle stopped processing
|
| 587 |
if st.session_state.control.is_stopped() and len(df) > 0:
|
| 588 |
+
st.warning("Обработку остановили. Показываю частичные результаты.")
|
| 589 |
+
if st.button("Скачать частичный результат"):
|
| 590 |
output = create_output_file(df, uploaded_file, llm)
|
| 591 |
st.download_button(
|
| 592 |
+
label="📊 Скачать частичный результат",
|
| 593 |
data=output,
|
| 594 |
file_name="partial_analysis.xlsx",
|
| 595 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
|
|
|
| 598 |
return df
|
| 599 |
|
| 600 |
except Exception as e:
|
| 601 |
+
st.error(f"Ошибка в обработке файла: {str(e)}")
|
| 602 |
return None
|
| 603 |
|
| 604 |
def translate_reasoning_to_russian(llm, text):
|
|
|
|
| 962 |
st.set_page_config(layout="wide")
|
| 963 |
|
| 964 |
with st.sidebar:
|
| 965 |
+
st.title("::: AI-анализ мониторинга новостей (v.3.56):::")
|
| 966 |
st.subheader("по материалам СКАН-ИНТЕРФАКС")
|
| 967 |
|
| 968 |
model_choice = st.radio(
|
| 969 |
"Выберите модель для анализа:",
|
| 970 |
+
["Local-MT5", "Qwen2.5-Coder", "Groq (llama-3.1-70b)", "ChatGPT-4-mini"],
|
| 971 |
key="model_selector",
|
| 972 |
help="Выберите модель для анализа новостей"
|
| 973 |
)
|
|
|
|
| 1000 |
|
| 1001 |
with col1:
|
| 1002 |
# Area for real-time updates
|
| 1003 |
+
st.subheader("Что найдено, сообщаю:")
|
| 1004 |
st.markdown("""
|
| 1005 |
<style>
|
| 1006 |
.stProgress .st-bo {
|
|
|
|
| 1023 |
|
| 1024 |
with col2:
|
| 1025 |
# Area for statistics
|
| 1026 |
+
st.subheader("Статистика")
|
| 1027 |
if st.session_state.processed_df is not None:
|
| 1028 |
+
st.metric("Всего статей", len(st.session_state.processed_df))
|
| 1029 |
+
st.metric("Из них негативных",
|
| 1030 |
len(st.session_state.processed_df[
|
| 1031 |
st.session_state.processed_df['Sentiment'] == 'Negative'
|
| 1032 |
])
|
| 1033 |
)
|
| 1034 |
+
st.metric("Событий обнаружено",
|
| 1035 |
len(st.session_state.processed_df[
|
| 1036 |
st.session_state.processed_df['Event_Type'] != 'Нет'
|
| 1037 |
])
|
|
|
|
| 1052 |
elapsed_time = format_elapsed_time(end_time - start_time)
|
| 1053 |
|
| 1054 |
# Show results
|
| 1055 |
+
st.subheader("Итого по результатам")
|
| 1056 |
|
| 1057 |
# Display statistics
|
| 1058 |
stats_cols = st.columns(4)
|
| 1059 |
with stats_cols[0]:
|
| 1060 |
+
st.metric("Всего обработано", len(st.session_state.processed_df))
|
| 1061 |
with stats_cols[1]:
|
| 1062 |
+
st.metric("Негативных",
|
| 1063 |
len(st.session_state.processed_df[
|
| 1064 |
st.session_state.processed_df['Sentiment'] == 'Negative'
|
| 1065 |
])
|
| 1066 |
)
|
| 1067 |
with stats_cols[2]:
|
| 1068 |
+
st.metric("Событий обнаружено",
|
| 1069 |
len(st.session_state.processed_df[
|
| 1070 |
st.session_state.processed_df['Event_Type'] != 'Нет'
|
| 1071 |
])
|
| 1072 |
)
|
| 1073 |
with stats_cols[3]:
|
| 1074 |
+
st.metric("Время обработки составило", elapsed_time)
|
| 1075 |
|
| 1076 |
# Show data previews
|
| 1077 |
+
with st.expander("📊 Предпросмотр данных", expanded=True):
|
| 1078 |
preview_cols = ['Объект', 'Заголовок', 'Sentiment', 'Event_Type']
|
| 1079 |
st.dataframe(
|
| 1080 |
st.session_state.processed_df[preview_cols],
|
|
|
|
| 1089 |
)
|
| 1090 |
|
| 1091 |
st.download_button(
|
| 1092 |
+
label="📥 Полный отчет - загрузить",
|
| 1093 |
data=output,
|
| 1094 |
+
file_name="результаты_анализа.xlsx",
|
| 1095 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
| 1096 |
key='download_button'
|
| 1097 |
)
|
| 1098 |
|
| 1099 |
except Exception as e:
|
| 1100 |
+
st.error(f"Ошибочка в обработке файла: {str(e)}")
|
| 1101 |
st.session_state.processed_df = None
|
| 1102 |
|
| 1103 |
|