Spaces:
Running
Running
Commit ·
bc222e3
1
Parent(s): aebf0a2
progress more 72
Browse files
app.py
CHANGED
|
@@ -260,9 +260,23 @@ def create_output_file_with_llm(df, uploaded_file, analysis_df):
|
|
| 260 |
def create_analysis_data(df):
|
| 261 |
analysis_data = []
|
| 262 |
for _, row in df.iterrows():
|
| 263 |
-
if
|
| 264 |
-
analysis_data.append([
|
| 265 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
|
| 267 |
# Function for lemmatizing Russian text
|
| 268 |
def lemmatize_text(text):
|
|
@@ -441,42 +455,52 @@ def process_file(uploaded_file):
|
|
| 441 |
|
| 442 |
return df
|
| 443 |
|
|
|
|
| 444 |
def create_output_file(df, uploaded_file):
|
| 445 |
wb = load_workbook("sample_file.xlsx")
|
| 446 |
|
| 447 |
-
# Update
|
| 448 |
summary_df = pd.DataFrame({
|
| 449 |
'Объект': df['Объект'].unique(),
|
| 450 |
'Всего новостей': df.groupby('Объект').size(),
|
| 451 |
-
'Негативные': df[df['Sentiment'] == 'Negative'].groupby('Объект').size(),
|
| 452 |
-
'Позитивные': df[df['Sentiment'] == 'Positive'].groupby('Объект').size(),
|
| 453 |
-
'Преобладающий эффект': df.groupby('Объект')['Impact'].agg(
|
|
|
|
|
|
|
| 454 |
})
|
| 455 |
-
|
|
|
|
|
|
|
| 456 |
|
| 457 |
# Write 'Сводка' sheet
|
| 458 |
ws = wb['Сводка']
|
| 459 |
-
for r_idx, row in enumerate(dataframe_to_rows(summary_df, index=False, header=
|
| 460 |
for c_idx, value in enumerate(row, start=5):
|
| 461 |
ws.cell(row=r_idx, column=c_idx, value=value)
|
| 462 |
|
| 463 |
-
#
|
| 464 |
-
|
| 465 |
significant_data = []
|
| 466 |
for _, row in df.iterrows():
|
| 467 |
-
if
|
| 468 |
-
|
| 469 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
|
| 471 |
-
# Write 'Значимые' sheet
|
| 472 |
ws = wb['Значимые']
|
| 473 |
for r_idx, row in enumerate(significant_data, start=3):
|
| 474 |
for c_idx, value in enumerate(row, start=3):
|
| 475 |
ws.cell(row=r_idx, column=c_idx, value=value)
|
| 476 |
|
| 477 |
-
#
|
|
|
|
| 478 |
ws = wb['Анализ']
|
| 479 |
-
for r_idx, row in enumerate(dataframe_to_rows(analysis_df, index=False, header=
|
| 480 |
for c_idx, value in enumerate(row, start=5):
|
| 481 |
ws.cell(row=r_idx, column=c_idx, value=value)
|
| 482 |
|
|
@@ -495,13 +519,13 @@ def create_output_file(df, uploaded_file):
|
|
| 495 |
for c_idx, value in enumerate(row, start=1):
|
| 496 |
ws.cell(row=r_idx, column=c_idx, value=value)
|
| 497 |
|
| 498 |
-
# Save the workbook to a BytesIO object
|
| 499 |
output = io.BytesIO()
|
| 500 |
wb.save(output)
|
| 501 |
output.seek(0)
|
| 502 |
-
|
| 503 |
return output
|
| 504 |
|
|
|
|
|
|
|
| 505 |
def generate_sentiment_visualization(df):
|
| 506 |
# Filter for negative sentiments
|
| 507 |
negative_df = df[df['Sentiment'] == 'Negative']
|
|
@@ -526,7 +550,26 @@ def generate_sentiment_visualization(df):
|
|
| 526 |
|
| 527 |
|
| 528 |
def main():
|
| 529 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 530 |
|
| 531 |
# Initialize session state
|
| 532 |
if 'processed_df' not in st.session_state:
|
|
|
|
| 260 |
def create_analysis_data(df):
|
| 261 |
analysis_data = []
|
| 262 |
for _, row in df.iterrows():
|
| 263 |
+
if row['Sentiment'] == 'Negative':
|
| 264 |
+
analysis_data.append([
|
| 265 |
+
row['Объект'],
|
| 266 |
+
row['Заголовок'],
|
| 267 |
+
'РИСК УБЫТКА',
|
| 268 |
+
row['Impact'], # Now using LLM's impact assessment
|
| 269 |
+
row['Reasoning'], # Adding LLM's reasoning
|
| 270 |
+
row['Выдержки из текста']
|
| 271 |
+
])
|
| 272 |
+
return pd.DataFrame(analysis_data, columns=[
|
| 273 |
+
'Объект',
|
| 274 |
+
'Заголовок',
|
| 275 |
+
'Признак',
|
| 276 |
+
'Оценка влияния',
|
| 277 |
+
'Обоснование',
|
| 278 |
+
'Текст сообщения'
|
| 279 |
+
])
|
| 280 |
|
| 281 |
# Function for lemmatizing Russian text
|
| 282 |
def lemmatize_text(text):
|
|
|
|
| 455 |
|
| 456 |
return df
|
| 457 |
|
| 458 |
+
|
| 459 |
def create_output_file(df, uploaded_file):
|
| 460 |
wb = load_workbook("sample_file.xlsx")
|
| 461 |
|
| 462 |
+
# Update 'Сводка' sheet
|
| 463 |
summary_df = pd.DataFrame({
|
| 464 |
'Объект': df['Объект'].unique(),
|
| 465 |
'Всего новостей': df.groupby('Объект').size(),
|
| 466 |
+
'Негативные': df[df['Sentiment'] == 'Negative'].groupby('Объект').size().fillna(0).astype(int),
|
| 467 |
+
'Позитивные': df[df['Sentiment'] == 'Positive'].groupby('Объект').size().fillna(0).astype(int),
|
| 468 |
+
'Преобладающий эффект': df.groupby('Объект')['Impact'].agg(
|
| 469 |
+
lambda x: x.value_counts().index[0] if len(x) > 0 else 'Неопределенный'
|
| 470 |
+
)
|
| 471 |
})
|
| 472 |
+
|
| 473 |
+
# Sort by number of negative mentions
|
| 474 |
+
summary_df = summary_df.sort_values('Негативные', ascending=False)
|
| 475 |
|
| 476 |
# Write 'Сводка' sheet
|
| 477 |
ws = wb['Сводка']
|
| 478 |
+
for r_idx, row in enumerate(dataframe_to_rows(summary_df, index=False, header=True), start=4):
|
| 479 |
for c_idx, value in enumerate(row, start=5):
|
| 480 |
ws.cell(row=r_idx, column=c_idx, value=value)
|
| 481 |
|
| 482 |
+
# Update 'Значимые' sheet
|
|
|
|
| 483 |
significant_data = []
|
| 484 |
for _, row in df.iterrows():
|
| 485 |
+
if row['Sentiment'] in ['Negative', 'Positive']:
|
| 486 |
+
significant_data.append([
|
| 487 |
+
row['Объект'],
|
| 488 |
+
'релевантен',
|
| 489 |
+
row['Sentiment'],
|
| 490 |
+
row['Impact'],
|
| 491 |
+
row['Заголовок'],
|
| 492 |
+
row['Выдержки из текста']
|
| 493 |
+
])
|
| 494 |
|
|
|
|
| 495 |
ws = wb['Значимые']
|
| 496 |
for r_idx, row in enumerate(significant_data, start=3):
|
| 497 |
for c_idx, value in enumerate(row, start=3):
|
| 498 |
ws.cell(row=r_idx, column=c_idx, value=value)
|
| 499 |
|
| 500 |
+
# Update 'Анализ' sheet
|
| 501 |
+
analysis_df = create_analysis_data(df)
|
| 502 |
ws = wb['Анализ']
|
| 503 |
+
for r_idx, row in enumerate(dataframe_to_rows(analysis_df, index=False, header=True), start=4):
|
| 504 |
for c_idx, value in enumerate(row, start=5):
|
| 505 |
ws.cell(row=r_idx, column=c_idx, value=value)
|
| 506 |
|
|
|
|
| 519 |
for c_idx, value in enumerate(row, start=1):
|
| 520 |
ws.cell(row=r_idx, column=c_idx, value=value)
|
| 521 |
|
|
|
|
| 522 |
output = io.BytesIO()
|
| 523 |
wb.save(output)
|
| 524 |
output.seek(0)
|
|
|
|
| 525 |
return output
|
| 526 |
|
| 527 |
+
|
| 528 |
+
|
| 529 |
def generate_sentiment_visualization(df):
|
| 530 |
# Filter for negative sentiments
|
| 531 |
negative_df = df[df['Sentiment'] == 'Negative']
|
|
|
|
| 550 |
|
| 551 |
|
| 552 |
def main():
|
| 553 |
+
# Add custom CSS for the signature
|
| 554 |
+
st.markdown(
|
| 555 |
+
"""
|
| 556 |
+
<style>
|
| 557 |
+
.signature {
|
| 558 |
+
position: fixed;
|
| 559 |
+
right: 10px;
|
| 560 |
+
bottom: 10px;
|
| 561 |
+
font-size: 12px;
|
| 562 |
+
color: #666;
|
| 563 |
+
opacity: 0.7;
|
| 564 |
+
z-index: 999;
|
| 565 |
+
}
|
| 566 |
+
</style>
|
| 567 |
+
<div class="signature">denis.pokrovsky.npff</div>
|
| 568 |
+
""",
|
| 569 |
+
unsafe_allow_html=True
|
| 570 |
+
)
|
| 571 |
+
|
| 572 |
+
st.title("... приступим к анализу... версия 72")
|
| 573 |
|
| 574 |
# Initialize session state
|
| 575 |
if 'processed_df' not in st.session_state:
|