Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -193,6 +193,25 @@ def merge_metadata_with_generated_data(generated_data, excel_metadata):
|
|
| 193 |
generated_data[post_number] = post_data
|
| 194 |
return generated_data
|
| 195 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
def generate_abstract(text):
|
| 197 |
try:
|
| 198 |
response = llm.chat([
|
|
@@ -316,20 +335,28 @@ with tabs[0]:
|
|
| 316 |
# Detailed Analysis Tab
|
| 317 |
# -------------------------------------------------------------------
|
| 318 |
with tabs[1]:
|
| 319 |
-
st.
|
| 320 |
-
uploaded_docx_analysis = st.file_uploader("Upload DOCX file", type=["docx"], key="detailed_docx")
|
| 321 |
|
| 322 |
-
|
| 323 |
-
|
|
|
|
| 324 |
total_posts = len(captions)
|
| 325 |
st.write(f"**Total number of posts:** {total_posts}")
|
| 326 |
|
| 327 |
language_counter = Counter()
|
|
|
|
|
|
|
| 328 |
hashtag_counter = Counter()
|
| 329 |
|
| 330 |
for post, text in captions.items():
|
| 331 |
lang = detect_language(text)
|
| 332 |
language_counter[lang] += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
hashtags = extract_hashtags(text)
|
| 334 |
for hashtag in hashtags:
|
| 335 |
hashtag_counter[hashtag] += 1
|
|
@@ -337,6 +364,13 @@ with tabs[1]:
|
|
| 337 |
st.subheader("Language Distribution")
|
| 338 |
st.write(dict(language_counter))
|
| 339 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
st.subheader("Hashtag Distribution")
|
| 341 |
st.write(dict(hashtag_counter))
|
| 342 |
|
|
@@ -353,6 +387,14 @@ with tabs[1]:
|
|
| 353 |
for lang, count in language_counter.items():
|
| 354 |
doc.add_paragraph(f"{lang}: {count}")
|
| 355 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
doc.add_heading("Hashtag Distribution", level=1)
|
| 357 |
for hashtag, count in hashtag_counter.items():
|
| 358 |
doc.add_paragraph(f"{hashtag}: {count}")
|
|
@@ -364,17 +406,43 @@ with tabs[1]:
|
|
| 364 |
doc.save(docx_io)
|
| 365 |
docx_io.seek(0)
|
| 366 |
st.download_button("Download Analysis Summary as DOCX", data=docx_io, file_name="analysis_summary.docx")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
df_summary.to_excel(writer, sheet_name="Summary", index=False)
|
| 372 |
-
|
| 373 |
-
df_lang = pd.DataFrame(list(language_counter.items()), columns=["Language", "Count"])
|
| 374 |
-
df_lang.to_excel(writer, sheet_name="Languages", index=False)
|
| 375 |
-
|
| 376 |
-
df_hashtags = pd.DataFrame(list(hashtag_counter.items()), columns=["Hashtag", "Count"])
|
| 377 |
-
df_hashtags.to_excel(writer, sheet_name="Hashtags", index=False)
|
| 378 |
|
| 379 |
-
|
| 380 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
generated_data[post_number] = post_data
|
| 194 |
return generated_data
|
| 195 |
|
| 196 |
+
def extract_frame_focus(text):
|
| 197 |
+
text_lower = text.lower()
|
| 198 |
+
frame_freq = {}
|
| 199 |
+
for frame, keywords in frame_categories.items():
|
| 200 |
+
freq = sum(1 for word in keywords if word in text_lower)
|
| 201 |
+
frame_freq[frame] = freq
|
| 202 |
+
detected = sorted(frame_freq.items(), key=lambda x: x[1], reverse=True)
|
| 203 |
+
frame_mapping_1 = {}
|
| 204 |
+
if detected:
|
| 205 |
+
frame_mapping_1[detected[0][0]] = "Major Focus"
|
| 206 |
+
for frame, _ in detected[1:3]:
|
| 207 |
+
frame_mapping_1[frame] = "Significant Focus"
|
| 208 |
+
for frame, _ in detected[3:]:
|
| 209 |
+
frame_mapping_1[frame] = "Minor Mention"
|
| 210 |
+
for frame in frame_categories.keys():
|
| 211 |
+
if frame not in frame_mapping_1:
|
| 212 |
+
frame_mapping_1[frame] = "Not Applicable"
|
| 213 |
+
return frame_mapping_1
|
| 214 |
+
|
| 215 |
def generate_abstract(text):
|
| 216 |
try:
|
| 217 |
response = llm.chat([
|
|
|
|
| 335 |
# Detailed Analysis Tab
|
| 336 |
# -------------------------------------------------------------------
|
| 337 |
with tabs[1]:
|
| 338 |
+
st.title("Detailed DOCX Analysis")
|
|
|
|
| 339 |
|
| 340 |
+
uploaded_docx = st.file_uploader("Upload DOCX file", type=["docx"])
|
| 341 |
+
if uploaded_docx:
|
| 342 |
+
captions = extract_captions_from_docx(uploaded_docx)
|
| 343 |
total_posts = len(captions)
|
| 344 |
st.write(f"**Total number of posts:** {total_posts}")
|
| 345 |
|
| 346 |
language_counter = Counter()
|
| 347 |
+
tone_counter = Counter()
|
| 348 |
+
frame_counter = {frame: Counter() for frame in frame_categories.keys()}
|
| 349 |
hashtag_counter = Counter()
|
| 350 |
|
| 351 |
for post, text in captions.items():
|
| 352 |
lang = detect_language(text)
|
| 353 |
language_counter[lang] += 1
|
| 354 |
+
tones = extract_tone(text)
|
| 355 |
+
for tone in tones:
|
| 356 |
+
tone_counter[tone] += 1
|
| 357 |
+
frame_mapping_1 = extract_frame_focus(text)
|
| 358 |
+
for frame, category in frame_mapping_1.items():
|
| 359 |
+
frame_counter[frame][category] += 1
|
| 360 |
hashtags = extract_hashtags(text)
|
| 361 |
for hashtag in hashtags:
|
| 362 |
hashtag_counter[hashtag] += 1
|
|
|
|
| 364 |
st.subheader("Language Distribution")
|
| 365 |
st.write(dict(language_counter))
|
| 366 |
|
| 367 |
+
st.subheader("Tone Distribution")
|
| 368 |
+
st.write(dict(tone_counter))
|
| 369 |
+
|
| 370 |
+
st.subheader("Frame Distribution")
|
| 371 |
+
for frame, counts in frame_counter.items():
|
| 372 |
+
st.write(f"**{frame}:** {dict(counts)}")
|
| 373 |
+
|
| 374 |
st.subheader("Hashtag Distribution")
|
| 375 |
st.write(dict(hashtag_counter))
|
| 376 |
|
|
|
|
| 387 |
for lang, count in language_counter.items():
|
| 388 |
doc.add_paragraph(f"{lang}: {count}")
|
| 389 |
|
| 390 |
+
doc.add_heading("Tone Distribution", level=1)
|
| 391 |
+
for tone, count in tone_counter.items():
|
| 392 |
+
doc.add_paragraph(f"{tone}: {count}")
|
| 393 |
+
|
| 394 |
+
doc.add_heading("Frame Distribution", level=1)
|
| 395 |
+
for frame, counts in frame_counter.items():
|
| 396 |
+
doc.add_paragraph(f"{frame}: {dict(counts)}")
|
| 397 |
+
|
| 398 |
doc.add_heading("Hashtag Distribution", level=1)
|
| 399 |
for hashtag, count in hashtag_counter.items():
|
| 400 |
doc.add_paragraph(f"{hashtag}: {count}")
|
|
|
|
| 406 |
doc.save(docx_io)
|
| 407 |
docx_io.seek(0)
|
| 408 |
st.download_button("Download Analysis Summary as DOCX", data=docx_io, file_name="analysis_summary.docx")
|
| 409 |
+
|
| 410 |
+
# Create an in-memory Excel file
|
| 411 |
+
excel_io = io.BytesIO()
|
| 412 |
+
|
| 413 |
+
with pd.ExcelWriter(excel_io, engine="xlsxwriter") as writer:
|
| 414 |
+
# Language Distribution sheet
|
| 415 |
+
df_language = pd.DataFrame(list(language_counter.items()), columns=["Language", "Count"])
|
| 416 |
+
df_language.to_excel(writer, index=False, sheet_name="Language Distribution")
|
| 417 |
|
| 418 |
+
# Tone Distribution sheet
|
| 419 |
+
df_tone = pd.DataFrame(list(tone_counter.items()), columns=["Tone", "Count"])
|
| 420 |
+
df_tone.to_excel(writer, index=False, sheet_name="Tone Distribution")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 421 |
|
| 422 |
+
# Frame Distribution sheet
|
| 423 |
+
# Convert the nested dictionary (frame_counter) into a DataFrame
|
| 424 |
+
df_frame = pd.DataFrame.from_dict({frame: dict(counter) for frame, counter in frame_counter.items()}, orient="index").fillna(0).astype(int)
|
| 425 |
+
df_frame.reset_index(inplace=True)
|
| 426 |
+
df_frame.rename(columns={"index": "Frame"}, inplace=True)
|
| 427 |
+
df_frame.to_excel(writer, index=False, sheet_name="Frame Distribution")
|
| 428 |
+
|
| 429 |
+
# Hashtag Distribution sheet
|
| 430 |
+
df_hashtag = pd.DataFrame(list(hashtag_counter.items()), columns=["Hashtag", "Count"])
|
| 431 |
+
df_hashtag.to_excel(writer, index=False, sheet_name="Hashtag Distribution")
|
| 432 |
+
|
| 433 |
+
# Abstract & Recommendations sheet
|
| 434 |
+
df_abstract = pd.DataFrame({"Abstract & Recommendations": [abstract]})
|
| 435 |
+
df_abstract.to_excel(writer, index=False, sheet_name="Abstract")
|
| 436 |
+
|
| 437 |
+
writer.save()
|
| 438 |
+
|
| 439 |
+
excel_io.seek(0)
|
| 440 |
+
|
| 441 |
+
# Download button for the Excel file
|
| 442 |
+
st.download_button(
|
| 443 |
+
label="Download Analysis Data as Excel",
|
| 444 |
+
data=excel_io,
|
| 445 |
+
file_name="analysis_data.xlsx",
|
| 446 |
+
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
| 447 |
+
)
|
| 448 |
+
|