Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -193,6 +193,17 @@ def merge_metadata_with_generated_data(generated_data, excel_metadata):
|
|
| 193 |
generated_data[post_number] = post_data
|
| 194 |
return generated_data
|
| 195 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
def create_docx_from_data(extracted_data):
|
| 197 |
doc = Document()
|
| 198 |
for post_number, data in extracted_data.items():
|
|
@@ -306,127 +317,64 @@ with tabs[0]:
|
|
| 306 |
# -------------------------------------------------------------------
|
| 307 |
with tabs[1]:
|
| 308 |
st.header("Detailed Analysis of DOCX File")
|
| 309 |
-
uploaded_docx_analysis = st.file_uploader("Upload DOCX file
|
| 310 |
|
| 311 |
if uploaded_docx_analysis:
|
| 312 |
-
# Extract posts from the uploaded DOCX file
|
| 313 |
captions = extract_captions_from_docx(uploaded_docx_analysis)
|
| 314 |
total_posts = len(captions)
|
| 315 |
st.write(f"**Total number of posts:** {total_posts}")
|
| 316 |
|
| 317 |
-
# Initialize counters
|
| 318 |
language_counter = Counter()
|
| 319 |
-
|
| 320 |
-
# Prepare a counter dictionary for each frame category across posts
|
| 321 |
-
frame_overall_counter = {frame: Counter() for frame in frame_categories.keys()}
|
| 322 |
|
| 323 |
-
# Process each post
|
| 324 |
for post, text in captions.items():
|
| 325 |
-
# Count languages used
|
| 326 |
lang = detect_language(text)
|
| 327 |
language_counter[lang] += 1
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
for tone in tones:
|
| 332 |
-
tone_counter[tone] += 1
|
| 333 |
-
|
| 334 |
-
# Count frames by focus level (Major Focus, Significant Focus, Minor Mention, Not Applicable)
|
| 335 |
-
frame_mapping = get_frame_category_mapping(text)
|
| 336 |
-
for frame, category in frame_mapping.items():
|
| 337 |
-
frame_overall_counter[frame][category] += 1
|
| 338 |
|
| 339 |
-
|
| 340 |
-
st.subheader("Languages Detected")
|
| 341 |
st.write(dict(language_counter))
|
| 342 |
|
| 343 |
-
st.subheader("
|
| 344 |
-
st.write(dict(
|
| 345 |
|
| 346 |
-
st.subheader("Frame Usage Counts")
|
| 347 |
-
for frame, counts in frame_overall_counter.items():
|
| 348 |
-
st.write(f"**{frame}:** {dict(counts)}")
|
| 349 |
-
|
| 350 |
-
# Generate an abstract of the document with recommendations using Groq API
|
| 351 |
combined_text = " ".join(captions.values())
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
"and recommendations for improvement. Document text: " + combined_text
|
| 355 |
-
)
|
| 356 |
-
try:
|
| 357 |
-
response = llm.chat([
|
| 358 |
-
{"role": "system", "content": "Analyze document abstract and provide recommendations."},
|
| 359 |
-
{"role": "user", "content": prompt}
|
| 360 |
-
])
|
| 361 |
-
abstract = response["choices"][0]["message"]["content"]
|
| 362 |
-
except Exception as e:
|
| 363 |
-
logging.error(f"Groq API error during abstract generation: {e}")
|
| 364 |
-
st.error("Error generating abstract using Groq API.")
|
| 365 |
-
abstract = "Abstract generation failed."
|
| 366 |
-
|
| 367 |
-
st.subheader("Abstract and Recommendations")
|
| 368 |
st.write(abstract)
|
| 369 |
|
| 370 |
-
# ---------------------------------------------------------------
|
| 371 |
-
# Generate downloadable DOCX summary file
|
| 372 |
-
# ---------------------------------------------------------------
|
| 373 |
doc = Document()
|
| 374 |
-
doc.add_heading("
|
| 375 |
doc.add_paragraph(f"Total number of posts: {total_posts}")
|
| 376 |
|
| 377 |
-
doc.add_heading("
|
| 378 |
for lang, count in language_counter.items():
|
| 379 |
doc.add_paragraph(f"{lang}: {count}")
|
| 380 |
|
| 381 |
-
doc.add_heading("
|
| 382 |
-
for
|
| 383 |
-
doc.add_paragraph(f"{
|
| 384 |
|
| 385 |
-
doc.add_heading("
|
| 386 |
-
for frame, counts in frame_overall_counter.items():
|
| 387 |
-
doc.add_paragraph(f"{frame}: {dict(counts)}")
|
| 388 |
-
|
| 389 |
-
doc.add_heading("Abstract and Recommendations", level=1)
|
| 390 |
doc.add_paragraph(abstract)
|
| 391 |
|
| 392 |
-
# Prepare DOCX for download
|
| 393 |
docx_io = io.BytesIO()
|
| 394 |
doc.save(docx_io)
|
| 395 |
docx_io.seek(0)
|
| 396 |
-
st.download_button(
|
| 397 |
-
"Download Analysis Summary as DOCX",
|
| 398 |
-
data=docx_io,
|
| 399 |
-
file_name="detailed_analysis_summary.docx"
|
| 400 |
-
)
|
| 401 |
|
| 402 |
-
# ---------------------------------------------------------------
|
| 403 |
-
# Generate downloadable Excel summary file with multiple sheets
|
| 404 |
-
# ---------------------------------------------------------------
|
| 405 |
excel_io = io.BytesIO()
|
| 406 |
with pd.ExcelWriter(excel_io, engine="xlsxwriter") as writer:
|
| 407 |
-
# Summary sheet
|
| 408 |
df_summary = pd.DataFrame({"Metric": ["Total Posts"], "Value": [total_posts]})
|
| 409 |
df_summary.to_excel(writer, sheet_name="Summary", index=False)
|
| 410 |
|
| 411 |
-
# Languages sheet
|
| 412 |
df_lang = pd.DataFrame(list(language_counter.items()), columns=["Language", "Count"])
|
| 413 |
df_lang.to_excel(writer, sheet_name="Languages", index=False)
|
| 414 |
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
# Frames sheet: Break down each frame by focus level
|
| 420 |
-
frame_list = []
|
| 421 |
-
for frame, counts in frame_overall_counter.items():
|
| 422 |
-
for category, count in counts.items():
|
| 423 |
-
frame_list.append({"Frame": frame, "Category": category, "Count": count})
|
| 424 |
-
df_frame = pd.DataFrame(frame_list)
|
| 425 |
-
df_frame.to_excel(writer, sheet_name="Frames", index=False)
|
| 426 |
-
|
| 427 |
excel_io.seek(0)
|
| 428 |
-
st.download_button(
|
| 429 |
-
"Download Analysis Summary as Excel",
|
| 430 |
-
data=excel_io,
|
| 431 |
-
file_name="detailed_analysis_summary.xlsx"
|
| 432 |
-
)
|
|
|
|
| 193 |
generated_data[post_number] = post_data
|
| 194 |
return generated_data
|
| 195 |
|
| 196 |
+
def generate_abstract(text):
|
| 197 |
+
try:
|
| 198 |
+
response = llm.chat([
|
| 199 |
+
{"role": "system", "content": "Generate an abstract and recommendations for the following document."},
|
| 200 |
+
{"role": "user", "content": text}
|
| 201 |
+
])
|
| 202 |
+
return response["choices"][0]["message"]["content"]
|
| 203 |
+
except Exception as e:
|
| 204 |
+
logging.error(f"Groq API error: {e}")
|
| 205 |
+
return "Abstract generation failed."
|
| 206 |
+
|
| 207 |
def create_docx_from_data(extracted_data):
|
| 208 |
doc = Document()
|
| 209 |
for post_number, data in extracted_data.items():
|
|
|
|
| 317 |
# -------------------------------------------------------------------
|
| 318 |
with tabs[1]:
|
| 319 |
st.header("Detailed Analysis of DOCX File")
|
| 320 |
+
uploaded_docx_analysis = st.file_uploader("Upload DOCX file", type=["docx"], key="detailed_docx")
|
| 321 |
|
| 322 |
if uploaded_docx_analysis:
|
|
|
|
| 323 |
captions = extract_captions_from_docx(uploaded_docx_analysis)
|
| 324 |
total_posts = len(captions)
|
| 325 |
st.write(f"**Total number of posts:** {total_posts}")
|
| 326 |
|
|
|
|
| 327 |
language_counter = Counter()
|
| 328 |
+
hashtag_counter = Counter()
|
|
|
|
|
|
|
| 329 |
|
|
|
|
| 330 |
for post, text in captions.items():
|
|
|
|
| 331 |
lang = detect_language(text)
|
| 332 |
language_counter[lang] += 1
|
| 333 |
+
hashtags = extract_hashtags(text)
|
| 334 |
+
for hashtag in hashtags:
|
| 335 |
+
hashtag_counter[hashtag] += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
|
| 337 |
+
st.subheader("Language Distribution")
|
|
|
|
| 338 |
st.write(dict(language_counter))
|
| 339 |
|
| 340 |
+
st.subheader("Hashtag Distribution")
|
| 341 |
+
st.write(dict(hashtag_counter))
|
| 342 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
combined_text = " ".join(captions.values())
|
| 344 |
+
abstract = generate_abstract(combined_text)
|
| 345 |
+
st.subheader("Abstract & Recommendations")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
st.write(abstract)
|
| 347 |
|
|
|
|
|
|
|
|
|
|
| 348 |
doc = Document()
|
| 349 |
+
doc.add_heading("Analysis Summary", 0)
|
| 350 |
doc.add_paragraph(f"Total number of posts: {total_posts}")
|
| 351 |
|
| 352 |
+
doc.add_heading("Language Distribution", level=1)
|
| 353 |
for lang, count in language_counter.items():
|
| 354 |
doc.add_paragraph(f"{lang}: {count}")
|
| 355 |
|
| 356 |
+
doc.add_heading("Hashtag Distribution", level=1)
|
| 357 |
+
for hashtag, count in hashtag_counter.items():
|
| 358 |
+
doc.add_paragraph(f"{hashtag}: {count}")
|
| 359 |
|
| 360 |
+
doc.add_heading("Abstract & Recommendations", level=1)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
doc.add_paragraph(abstract)
|
| 362 |
|
|
|
|
| 363 |
docx_io = io.BytesIO()
|
| 364 |
doc.save(docx_io)
|
| 365 |
docx_io.seek(0)
|
| 366 |
+
st.download_button("Download Analysis Summary as DOCX", data=docx_io, file_name="analysis_summary.docx")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
|
|
|
|
|
|
|
|
|
|
| 368 |
excel_io = io.BytesIO()
|
| 369 |
with pd.ExcelWriter(excel_io, engine="xlsxwriter") as writer:
|
|
|
|
| 370 |
df_summary = pd.DataFrame({"Metric": ["Total Posts"], "Value": [total_posts]})
|
| 371 |
df_summary.to_excel(writer, sheet_name="Summary", index=False)
|
| 372 |
|
|
|
|
| 373 |
df_lang = pd.DataFrame(list(language_counter.items()), columns=["Language", "Count"])
|
| 374 |
df_lang.to_excel(writer, sheet_name="Languages", index=False)
|
| 375 |
|
| 376 |
+
df_hashtags = pd.DataFrame(list(hashtag_counter.items()), columns=["Hashtag", "Count"])
|
| 377 |
+
df_hashtags.to_excel(writer, sheet_name="Hashtags", index=False)
|
| 378 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
excel_io.seek(0)
|
| 380 |
+
st.download_button("Download Analysis Summary as Excel", data=excel_io, file_name="analysis_summary.xlsx")
|
|
|
|
|
|
|
|
|
|
|
|