Spaces:
Running
Running
Commit ·
10f44f6
1
Parent(s): 9da9717
4.16 attempt at mend stop
Browse files
app.py
CHANGED
|
@@ -992,7 +992,14 @@ def process_file(uploaded_file, model_choice, translation_method=None):
|
|
| 992 |
# Create processed_rows_df with all columns from original df and required columns
|
| 993 |
all_columns = list(set(list(df.columns) + list(required_columns.keys())))
|
| 994 |
processed_rows_df = pd.DataFrame(columns=all_columns)
|
| 995 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 996 |
# Process rows
|
| 997 |
total_rows = len(df)
|
| 998 |
processed_rows = 0
|
|
@@ -1017,7 +1024,7 @@ def process_file(uploaded_file, model_choice, translation_method=None):
|
|
| 1017 |
result_df['Сводка'] = svodka_df.to_dict('records')
|
| 1018 |
result_df['Публикации'] = processed_rows_df.to_dict('records')
|
| 1019 |
|
| 1020 |
-
output = create_output_file(
|
| 1021 |
if output is not None:
|
| 1022 |
st.download_button(
|
| 1023 |
label=f"📊 Скачать результат ({processed_rows} из {total_rows} строк)",
|
|
@@ -1525,7 +1532,7 @@ def main():
|
|
| 1525 |
st.set_page_config(layout="wide")
|
| 1526 |
|
| 1527 |
with st.sidebar:
|
| 1528 |
-
st.title("::: AI-анализ мониторинга новостей (v.4.
|
| 1529 |
st.subheader("по материалам СКАН-ИНТЕРФАКС")
|
| 1530 |
|
| 1531 |
model_choice = st.radio(
|
|
|
|
| 992 |
# Create processed_rows_df with all columns from original df and required columns
|
| 993 |
all_columns = list(set(list(df.columns) + list(required_columns.keys())))
|
| 994 |
processed_rows_df = pd.DataFrame(columns=all_columns)
|
| 995 |
+
|
| 996 |
+
# Deduplication
|
| 997 |
+
original_count = len(df)
|
| 998 |
+
df = df.groupby('Объект', group_keys=False).apply(
|
| 999 |
+
lambda x: fuzzy_deduplicate(x, 'Выдержки из текста', 55)
|
| 1000 |
+
).reset_index(drop=True)
|
| 1001 |
+
st.write(f"Из {original_count} сообщений удалено {original_count - len(df)} дубликатов.")
|
| 1002 |
+
|
| 1003 |
# Process rows
|
| 1004 |
total_rows = len(df)
|
| 1005 |
processed_rows = 0
|
|
|
|
| 1024 |
result_df['Сводка'] = svodka_df.to_dict('records')
|
| 1025 |
result_df['Публикации'] = processed_rows_df.to_dict('records')
|
| 1026 |
|
| 1027 |
+
output = create_output_file(processed_rows_df, uploaded_file)
|
| 1028 |
if output is not None:
|
| 1029 |
st.download_button(
|
| 1030 |
label=f"📊 Скачать результат ({processed_rows} из {total_rows} строк)",
|
|
|
|
| 1532 |
st.set_page_config(layout="wide")
|
| 1533 |
|
| 1534 |
with st.sidebar:
|
| 1535 |
+
st.title("::: AI-анализ мониторинга новостей (v.4.16):::")
|
| 1536 |
st.subheader("по материалам СКАН-ИНТЕРФАКС")
|
| 1537 |
|
| 1538 |
model_choice = st.radio(
|