ahm14 commited on
Commit
8106243
·
verified ·
1 Parent(s): b797eeb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -84
app.py CHANGED
@@ -193,6 +193,17 @@ def merge_metadata_with_generated_data(generated_data, excel_metadata):
193
  generated_data[post_number] = post_data
194
  return generated_data
195
 
 
 
 
 
 
 
 
 
 
 
 
196
  def create_docx_from_data(extracted_data):
197
  doc = Document()
198
  for post_number, data in extracted_data.items():
@@ -306,127 +317,64 @@ with tabs[0]:
306
  # -------------------------------------------------------------------
307
  with tabs[1]:
308
  st.header("Detailed Analysis of DOCX File")
309
- uploaded_docx_analysis = st.file_uploader("Upload DOCX file for detailed analysis", type=["docx"], key="detailed_docx")
310
 
311
  if uploaded_docx_analysis:
312
- # Extract posts from the uploaded DOCX file
313
  captions = extract_captions_from_docx(uploaded_docx_analysis)
314
  total_posts = len(captions)
315
  st.write(f"**Total number of posts:** {total_posts}")
316
 
317
- # Initialize counters
318
  language_counter = Counter()
319
- tone_counter = Counter()
320
- # Prepare a counter dictionary for each frame category across posts
321
- frame_overall_counter = {frame: Counter() for frame in frame_categories.keys()}
322
 
323
- # Process each post
324
  for post, text in captions.items():
325
- # Count languages used
326
  lang = detect_language(text)
327
  language_counter[lang] += 1
328
-
329
- # Count tones
330
- tones = extract_tone(text)
331
- for tone in tones:
332
- tone_counter[tone] += 1
333
-
334
- # Count frames by focus level (Major Focus, Significant Focus, Minor Mention, Not Applicable)
335
- frame_mapping = get_frame_category_mapping(text)
336
- for frame, category in frame_mapping.items():
337
- frame_overall_counter[frame][category] += 1
338
 
339
- # Display aggregated statistics
340
- st.subheader("Languages Detected")
341
  st.write(dict(language_counter))
342
 
343
- st.subheader("Tone Counts")
344
- st.write(dict(tone_counter))
345
 
346
- st.subheader("Frame Usage Counts")
347
- for frame, counts in frame_overall_counter.items():
348
- st.write(f"**{frame}:** {dict(counts)}")
349
-
350
- # Generate an abstract of the document with recommendations using Groq API
351
  combined_text = " ".join(captions.values())
352
- prompt = (
353
- "Generate an abstract of the document along with possible reasons behind the observed patterns "
354
- "and recommendations for improvement. Document text: " + combined_text
355
- )
356
- try:
357
- response = llm.chat([
358
- {"role": "system", "content": "Analyze document abstract and provide recommendations."},
359
- {"role": "user", "content": prompt}
360
- ])
361
- abstract = response["choices"][0]["message"]["content"]
362
- except Exception as e:
363
- logging.error(f"Groq API error during abstract generation: {e}")
364
- st.error("Error generating abstract using Groq API.")
365
- abstract = "Abstract generation failed."
366
-
367
- st.subheader("Abstract and Recommendations")
368
  st.write(abstract)
369
 
370
- # ---------------------------------------------------------------
371
- # Generate downloadable DOCX summary file
372
- # ---------------------------------------------------------------
373
  doc = Document()
374
- doc.add_heading("Detailed Analysis Summary", 0)
375
  doc.add_paragraph(f"Total number of posts: {total_posts}")
376
 
377
- doc.add_heading("Languages Detected", level=1)
378
  for lang, count in language_counter.items():
379
  doc.add_paragraph(f"{lang}: {count}")
380
 
381
- doc.add_heading("Tone Counts", level=1)
382
- for tone, count in tone_counter.items():
383
- doc.add_paragraph(f"{tone}: {count}")
384
 
385
- doc.add_heading("Frame Usage Counts", level=1)
386
- for frame, counts in frame_overall_counter.items():
387
- doc.add_paragraph(f"{frame}: {dict(counts)}")
388
-
389
- doc.add_heading("Abstract and Recommendations", level=1)
390
  doc.add_paragraph(abstract)
391
 
392
- # Prepare DOCX for download
393
  docx_io = io.BytesIO()
394
  doc.save(docx_io)
395
  docx_io.seek(0)
396
- st.download_button(
397
- "Download Analysis Summary as DOCX",
398
- data=docx_io,
399
- file_name="detailed_analysis_summary.docx"
400
- )
401
 
402
- # ---------------------------------------------------------------
403
- # Generate downloadable Excel summary file with multiple sheets
404
- # ---------------------------------------------------------------
405
  excel_io = io.BytesIO()
406
  with pd.ExcelWriter(excel_io, engine="xlsxwriter") as writer:
407
- # Summary sheet
408
  df_summary = pd.DataFrame({"Metric": ["Total Posts"], "Value": [total_posts]})
409
  df_summary.to_excel(writer, sheet_name="Summary", index=False)
410
 
411
- # Languages sheet
412
  df_lang = pd.DataFrame(list(language_counter.items()), columns=["Language", "Count"])
413
  df_lang.to_excel(writer, sheet_name="Languages", index=False)
414
 
415
- # Tones sheet
416
- df_tone = pd.DataFrame(list(tone_counter.items()), columns=["Tone", "Count"])
417
- df_tone.to_excel(writer, sheet_name="Tones", index=False)
418
-
419
- # Frames sheet: Break down each frame by focus level
420
- frame_list = []
421
- for frame, counts in frame_overall_counter.items():
422
- for category, count in counts.items():
423
- frame_list.append({"Frame": frame, "Category": category, "Count": count})
424
- df_frame = pd.DataFrame(frame_list)
425
- df_frame.to_excel(writer, sheet_name="Frames", index=False)
426
-
427
  excel_io.seek(0)
428
- st.download_button(
429
- "Download Analysis Summary as Excel",
430
- data=excel_io,
431
- file_name="detailed_analysis_summary.xlsx"
432
- )
 
193
  generated_data[post_number] = post_data
194
  return generated_data
195
 
196
+ def generate_abstract(text):
197
+ try:
198
+ response = llm.chat([
199
+ {"role": "system", "content": "Generate an abstract and recommendations for the following document."},
200
+ {"role": "user", "content": text}
201
+ ])
202
+ return response["choices"][0]["message"]["content"]
203
+ except Exception as e:
204
+ logging.error(f"Groq API error: {e}")
205
+ return "Abstract generation failed."
206
+
207
  def create_docx_from_data(extracted_data):
208
  doc = Document()
209
  for post_number, data in extracted_data.items():
 
317
  # -------------------------------------------------------------------
318
  with tabs[1]:
319
  st.header("Detailed Analysis of DOCX File")
320
+ uploaded_docx_analysis = st.file_uploader("Upload DOCX file", type=["docx"], key="detailed_docx")
321
 
322
  if uploaded_docx_analysis:
 
323
  captions = extract_captions_from_docx(uploaded_docx_analysis)
324
  total_posts = len(captions)
325
  st.write(f"**Total number of posts:** {total_posts}")
326
 
 
327
  language_counter = Counter()
328
+ hashtag_counter = Counter()
 
 
329
 
 
330
  for post, text in captions.items():
 
331
  lang = detect_language(text)
332
  language_counter[lang] += 1
333
+ hashtags = extract_hashtags(text)
334
+ for hashtag in hashtags:
335
+ hashtag_counter[hashtag] += 1
 
 
 
 
 
 
 
336
 
337
+ st.subheader("Language Distribution")
 
338
  st.write(dict(language_counter))
339
 
340
+ st.subheader("Hashtag Distribution")
341
+ st.write(dict(hashtag_counter))
342
 
 
 
 
 
 
343
  combined_text = " ".join(captions.values())
344
+ abstract = generate_abstract(combined_text)
345
+ st.subheader("Abstract & Recommendations")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  st.write(abstract)
347
 
 
 
 
348
  doc = Document()
349
+ doc.add_heading("Analysis Summary", 0)
350
  doc.add_paragraph(f"Total number of posts: {total_posts}")
351
 
352
+ doc.add_heading("Language Distribution", level=1)
353
  for lang, count in language_counter.items():
354
  doc.add_paragraph(f"{lang}: {count}")
355
 
356
+ doc.add_heading("Hashtag Distribution", level=1)
357
+ for hashtag, count in hashtag_counter.items():
358
+ doc.add_paragraph(f"{hashtag}: {count}")
359
 
360
+ doc.add_heading("Abstract & Recommendations", level=1)
 
 
 
 
361
  doc.add_paragraph(abstract)
362
 
 
363
  docx_io = io.BytesIO()
364
  doc.save(docx_io)
365
  docx_io.seek(0)
366
+ st.download_button("Download Analysis Summary as DOCX", data=docx_io, file_name="analysis_summary.docx")
 
 
 
 
367
 
 
 
 
368
  excel_io = io.BytesIO()
369
  with pd.ExcelWriter(excel_io, engine="xlsxwriter") as writer:
 
370
  df_summary = pd.DataFrame({"Metric": ["Total Posts"], "Value": [total_posts]})
371
  df_summary.to_excel(writer, sheet_name="Summary", index=False)
372
 
 
373
  df_lang = pd.DataFrame(list(language_counter.items()), columns=["Language", "Count"])
374
  df_lang.to_excel(writer, sheet_name="Languages", index=False)
375
 
376
+ df_hashtags = pd.DataFrame(list(hashtag_counter.items()), columns=["Hashtag", "Count"])
377
+ df_hashtags.to_excel(writer, sheet_name="Hashtags", index=False)
378
+
 
 
 
 
 
 
 
 
 
379
  excel_io.seek(0)
380
+ st.download_button("Download Analysis Summary as Excel", data=excel_io, file_name="analysis_summary.xlsx")