ahm14 commited on
Commit
6652627
·
verified ·
1 Parent(s): 8106243

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -16
app.py CHANGED
@@ -193,6 +193,25 @@ def merge_metadata_with_generated_data(generated_data, excel_metadata):
193
  generated_data[post_number] = post_data
194
  return generated_data
195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  def generate_abstract(text):
197
  try:
198
  response = llm.chat([
@@ -316,20 +335,28 @@ with tabs[0]:
316
  # Detailed Analysis Tab
317
  # -------------------------------------------------------------------
318
  with tabs[1]:
319
- st.header("Detailed Analysis of DOCX File")
320
- uploaded_docx_analysis = st.file_uploader("Upload DOCX file", type=["docx"], key="detailed_docx")
321
 
322
- if uploaded_docx_analysis:
323
- captions = extract_captions_from_docx(uploaded_docx_analysis)
 
324
  total_posts = len(captions)
325
  st.write(f"**Total number of posts:** {total_posts}")
326
 
327
  language_counter = Counter()
 
 
328
  hashtag_counter = Counter()
329
 
330
  for post, text in captions.items():
331
  lang = detect_language(text)
332
  language_counter[lang] += 1
 
 
 
 
 
 
333
  hashtags = extract_hashtags(text)
334
  for hashtag in hashtags:
335
  hashtag_counter[hashtag] += 1
@@ -337,6 +364,13 @@ with tabs[1]:
337
  st.subheader("Language Distribution")
338
  st.write(dict(language_counter))
339
 
 
 
 
 
 
 
 
340
  st.subheader("Hashtag Distribution")
341
  st.write(dict(hashtag_counter))
342
 
@@ -353,6 +387,14 @@ with tabs[1]:
353
  for lang, count in language_counter.items():
354
  doc.add_paragraph(f"{lang}: {count}")
355
 
 
 
 
 
 
 
 
 
356
  doc.add_heading("Hashtag Distribution", level=1)
357
  for hashtag, count in hashtag_counter.items():
358
  doc.add_paragraph(f"{hashtag}: {count}")
@@ -364,17 +406,43 @@ with tabs[1]:
364
  doc.save(docx_io)
365
  docx_io.seek(0)
366
  st.download_button("Download Analysis Summary as DOCX", data=docx_io, file_name="analysis_summary.docx")
 
 
 
 
 
 
 
 
367
 
368
- excel_io = io.BytesIO()
369
- with pd.ExcelWriter(excel_io, engine="xlsxwriter") as writer:
370
- df_summary = pd.DataFrame({"Metric": ["Total Posts"], "Value": [total_posts]})
371
- df_summary.to_excel(writer, sheet_name="Summary", index=False)
372
-
373
- df_lang = pd.DataFrame(list(language_counter.items()), columns=["Language", "Count"])
374
- df_lang.to_excel(writer, sheet_name="Languages", index=False)
375
-
376
- df_hashtags = pd.DataFrame(list(hashtag_counter.items()), columns=["Hashtag", "Count"])
377
- df_hashtags.to_excel(writer, sheet_name="Hashtags", index=False)
378
 
379
- excel_io.seek(0)
380
- st.download_button("Download Analysis Summary as Excel", data=excel_io, file_name="analysis_summary.xlsx")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  generated_data[post_number] = post_data
194
  return generated_data
195
 
196
+ def extract_frame_focus(text):
197
+ text_lower = text.lower()
198
+ frame_freq = {}
199
+ for frame, keywords in frame_categories.items():
200
+ freq = sum(1 for word in keywords if word in text_lower)
201
+ frame_freq[frame] = freq
202
+ detected = sorted(frame_freq.items(), key=lambda x: x[1], reverse=True)
203
+ frame_mapping_1 = {}
204
+ if detected:
205
+ frame_mapping_1[detected[0][0]] = "Major Focus"
206
+ for frame, _ in detected[1:3]:
207
+ frame_mapping_1[frame] = "Significant Focus"
208
+ for frame, _ in detected[3:]:
209
+ frame_mapping_1[frame] = "Minor Mention"
210
+ for frame in frame_categories.keys():
211
+ if frame not in frame_mapping_1:
212
+ frame_mapping_1[frame] = "Not Applicable"
213
+ return frame_mapping_1
214
+
215
  def generate_abstract(text):
216
  try:
217
  response = llm.chat([
 
335
  # Detailed Analysis Tab
336
  # -------------------------------------------------------------------
337
  with tabs[1]:
338
+ st.title("Detailed DOCX Analysis")
 
339
 
340
+ uploaded_docx = st.file_uploader("Upload DOCX file", type=["docx"])
341
+ if uploaded_docx:
342
+ captions = extract_captions_from_docx(uploaded_docx)
343
  total_posts = len(captions)
344
  st.write(f"**Total number of posts:** {total_posts}")
345
 
346
  language_counter = Counter()
347
+ tone_counter = Counter()
348
+ frame_counter = {frame: Counter() for frame in frame_categories.keys()}
349
  hashtag_counter = Counter()
350
 
351
  for post, text in captions.items():
352
  lang = detect_language(text)
353
  language_counter[lang] += 1
354
+ tones = extract_tone(text)
355
+ for tone in tones:
356
+ tone_counter[tone] += 1
357
+ frame_mapping_1 = extract_frame_focus(text)
358
+ for frame, category in frame_mapping_1.items():
359
+ frame_counter[frame][category] += 1
360
  hashtags = extract_hashtags(text)
361
  for hashtag in hashtags:
362
  hashtag_counter[hashtag] += 1
 
364
  st.subheader("Language Distribution")
365
  st.write(dict(language_counter))
366
 
367
+ st.subheader("Tone Distribution")
368
+ st.write(dict(tone_counter))
369
+
370
+ st.subheader("Frame Distribution")
371
+ for frame, counts in frame_counter.items():
372
+ st.write(f"**{frame}:** {dict(counts)}")
373
+
374
  st.subheader("Hashtag Distribution")
375
  st.write(dict(hashtag_counter))
376
 
 
387
  for lang, count in language_counter.items():
388
  doc.add_paragraph(f"{lang}: {count}")
389
 
390
+ doc.add_heading("Tone Distribution", level=1)
391
+ for tone, count in tone_counter.items():
392
+ doc.add_paragraph(f"{tone}: {count}")
393
+
394
+ doc.add_heading("Frame Distribution", level=1)
395
+ for frame, counts in frame_counter.items():
396
+ doc.add_paragraph(f"{frame}: {dict(counts)}")
397
+
398
  doc.add_heading("Hashtag Distribution", level=1)
399
  for hashtag, count in hashtag_counter.items():
400
  doc.add_paragraph(f"{hashtag}: {count}")
 
406
  doc.save(docx_io)
407
  docx_io.seek(0)
408
  st.download_button("Download Analysis Summary as DOCX", data=docx_io, file_name="analysis_summary.docx")
409
+
410
+ # Create an in-memory Excel file
411
+ excel_io = io.BytesIO()
412
+
413
+ with pd.ExcelWriter(excel_io, engine="xlsxwriter") as writer:
414
+ # Language Distribution sheet
415
+ df_language = pd.DataFrame(list(language_counter.items()), columns=["Language", "Count"])
416
+ df_language.to_excel(writer, index=False, sheet_name="Language Distribution")
417
 
418
+ # Tone Distribution sheet
419
+ df_tone = pd.DataFrame(list(tone_counter.items()), columns=["Tone", "Count"])
420
+ df_tone.to_excel(writer, index=False, sheet_name="Tone Distribution")
 
 
 
 
 
 
 
421
 
422
+ # Frame Distribution sheet
423
+ # Convert the nested dictionary (frame_counter) into a DataFrame
424
+ df_frame = pd.DataFrame.from_dict({frame: dict(counter) for frame, counter in frame_counter.items()}, orient="index").fillna(0).astype(int)
425
+ df_frame.reset_index(inplace=True)
426
+ df_frame.rename(columns={"index": "Frame"}, inplace=True)
427
+ df_frame.to_excel(writer, index=False, sheet_name="Frame Distribution")
428
+
429
+ # Hashtag Distribution sheet
430
+ df_hashtag = pd.DataFrame(list(hashtag_counter.items()), columns=["Hashtag", "Count"])
431
+ df_hashtag.to_excel(writer, index=False, sheet_name="Hashtag Distribution")
432
+
433
+ # Abstract & Recommendations sheet
434
+ df_abstract = pd.DataFrame({"Abstract & Recommendations": [abstract]})
435
+ df_abstract.to_excel(writer, index=False, sheet_name="Abstract")
436
+
437
+ writer.save()
438
+
439
+ excel_io.seek(0)
440
+
441
+ # Download button for the Excel file
442
+ st.download_button(
443
+ label="Download Analysis Data as Excel",
444
+ data=excel_io,
445
+ file_name="analysis_data.xlsx",
446
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
447
+ )
448
+