Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -93,32 +93,19 @@ def extract_frames(text):
|
|
| 93 |
try:
|
| 94 |
response = llm.chat([{"role": "system", "content": "Classify the following text into relevant activism frames and assign Major, Significant, or Minor focus."},
|
| 95 |
{"role": "user", "content": text}])
|
| 96 |
-
return
|
| 97 |
except Exception as e:
|
| 98 |
logging.error(f"Groq API error: {e}")
|
| 99 |
return extract_frames_fallback(text)
|
| 100 |
|
| 101 |
-
# Categorize frame focus: Major, Significant, Minor
|
| 102 |
-
def categorize_frame_focus(frames_text):
|
| 103 |
-
frame_data = {}
|
| 104 |
-
frames = frames_text.split(", ")
|
| 105 |
-
for frame in frames:
|
| 106 |
-
if "Major" in frame:
|
| 107 |
-
frame_data[frame] = "Major Focus"
|
| 108 |
-
elif "Significant" in frame:
|
| 109 |
-
frame_data[frame] = "Significant Focus"
|
| 110 |
-
else:
|
| 111 |
-
frame_data[frame] = "Minor Mention"
|
| 112 |
-
return frame_data
|
| 113 |
-
|
| 114 |
# Fallback method for frame extraction
|
| 115 |
def extract_frames_fallback(text):
|
| 116 |
-
detected_frames =
|
| 117 |
text_lower = text.lower()
|
| 118 |
for category, keywords in frame_categories.items():
|
| 119 |
if any(word in text_lower for word in keywords):
|
| 120 |
-
detected_frames
|
| 121 |
-
return detected_frames
|
| 122 |
|
| 123 |
# Extract captions from DOCX
|
| 124 |
def extract_captions_from_docx(docx_file):
|
|
@@ -134,14 +121,44 @@ def extract_captions_from_docx(docx_file):
|
|
| 134 |
captions[current_post].append(text)
|
| 135 |
return {post: " ".join(lines) for post, lines in captions.items() if lines}
|
| 136 |
|
| 137 |
-
#
|
| 138 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
doc = Document()
|
| 140 |
-
for
|
| 141 |
-
doc.add_heading(
|
| 142 |
-
for key, value in
|
| 143 |
doc.add_paragraph(f"{key}: {value}")
|
| 144 |
-
doc.add_paragraph() # Add
|
| 145 |
return doc
|
| 146 |
|
| 147 |
# Streamlit app
|
|
@@ -198,15 +215,16 @@ if uploaded_excel:
|
|
| 198 |
if output_data:
|
| 199 |
st.write(output_data)
|
| 200 |
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
|
|
|
| 206 |
|
| 207 |
st.download_button(
|
| 208 |
-
label="Download Extracted Data",
|
| 209 |
-
data=
|
| 210 |
file_name="extracted_data.docx",
|
| 211 |
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
| 212 |
)
|
|
|
|
| 93 |
try:
|
| 94 |
response = llm.chat([{"role": "system", "content": "Classify the following text into relevant activism frames and assign Major, Significant, or Minor focus."},
|
| 95 |
{"role": "user", "content": text}])
|
| 96 |
+
return response["choices"][0]["message"]["content"]
|
| 97 |
except Exception as e:
|
| 98 |
logging.error(f"Groq API error: {e}")
|
| 99 |
return extract_frames_fallback(text)
|
| 100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
# Fallback method for frame extraction
|
| 102 |
def extract_frames_fallback(text):
|
| 103 |
+
detected_frames = set()
|
| 104 |
text_lower = text.lower()
|
| 105 |
for category, keywords in frame_categories.items():
|
| 106 |
if any(word in text_lower for word in keywords):
|
| 107 |
+
detected_frames.add(category)
|
| 108 |
+
return list(detected_frames)
|
| 109 |
|
| 110 |
# Extract captions from DOCX
|
| 111 |
def extract_captions_from_docx(docx_file):
|
|
|
|
| 121 |
captions[current_post].append(text)
|
| 122 |
return {post: " ".join(lines) for post, lines in captions.items() if lines}
|
| 123 |
|
| 124 |
+
# Function to extract metadata from an Excel file
|
| 125 |
+
def extract_metadata_from_excel(excel_file):
|
| 126 |
+
df = pd.read_excel(excel_file)
|
| 127 |
+
extracted_data = []
|
| 128 |
+
|
| 129 |
+
for index, row in df.iterrows():
|
| 130 |
+
post_data = {
|
| 131 |
+
"Post Number": f"Post {index + 1}",
|
| 132 |
+
"Date of Post": row.get("Date", "N/A"),
|
| 133 |
+
"Media Type": row.get("Media Type", "N/A"),
|
| 134 |
+
"Number of Pictures": row.get("Number of Pictures", 0),
|
| 135 |
+
"Number of Videos": row.get("Number of Videos", 0),
|
| 136 |
+
"Number of Audios": row.get("Number of Audios", 0),
|
| 137 |
+
"Likes": row.get("Likes", 0),
|
| 138 |
+
"Comments": row.get("Comments", 0),
|
| 139 |
+
}
|
| 140 |
+
extracted_data.append(post_data)
|
| 141 |
+
|
| 142 |
+
return extracted_data
|
| 143 |
+
|
| 144 |
+
# Merge metadata from Excel with the generated data
|
| 145 |
+
def merge_metadata_with_generated_data(generated_data, excel_metadata):
|
| 146 |
+
for post_data in excel_metadata:
|
| 147 |
+
post_number = post_data["Post Number"]
|
| 148 |
+
if post_number in generated_data:
|
| 149 |
+
generated_data[post_number].update(post_data)
|
| 150 |
+
else:
|
| 151 |
+
generated_data[post_number] = post_data
|
| 152 |
+
return generated_data
|
| 153 |
+
|
| 154 |
+
# Function to create DOCX from extracted data
|
| 155 |
+
def create_docx_from_data(extracted_data):
|
| 156 |
doc = Document()
|
| 157 |
+
for post_number, data in extracted_data.items():
|
| 158 |
+
doc.add_heading(post_number, level=1)
|
| 159 |
+
for key, value in data.items():
|
| 160 |
doc.add_paragraph(f"{key}: {value}")
|
| 161 |
+
doc.add_paragraph("\n") # Add a line break between posts
|
| 162 |
return doc
|
| 163 |
|
| 164 |
# Streamlit app
|
|
|
|
| 215 |
if output_data:
|
| 216 |
st.write(output_data)
|
| 217 |
|
| 218 |
+
# Create DOCX file for download
|
| 219 |
+
if output_data:
|
| 220 |
+
doc = create_docx_from_data(output_data)
|
| 221 |
+
docx_io = io.BytesIO()
|
| 222 |
+
doc.save(docx_io)
|
| 223 |
+
docx_io.seek(0)
|
| 224 |
|
| 225 |
st.download_button(
|
| 226 |
+
label="Download Extracted Data as DOCX",
|
| 227 |
+
data=docx_io,
|
| 228 |
file_name="extracted_data.docx",
|
| 229 |
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
| 230 |
)
|