ahm14 commited on
Commit
9e0b8b3
·
verified ·
1 Parent(s): cf0ede7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -30
app.py CHANGED
@@ -93,32 +93,19 @@ def extract_frames(text):
93
  try:
94
  response = llm.chat([{"role": "system", "content": "Classify the following text into relevant activism frames and assign Major, Significant, or Minor focus."},
95
  {"role": "user", "content": text}])
96
- return categorize_frame_focus(response["choices"][0]["message"]["content"])
97
  except Exception as e:
98
  logging.error(f"Groq API error: {e}")
99
  return extract_frames_fallback(text)
100
 
101
- # Categorize frame focus: Major, Significant, Minor
102
- def categorize_frame_focus(frames_text):
103
- frame_data = {}
104
- frames = frames_text.split(", ")
105
- for frame in frames:
106
- if "Major" in frame:
107
- frame_data[frame] = "Major Focus"
108
- elif "Significant" in frame:
109
- frame_data[frame] = "Significant Focus"
110
- else:
111
- frame_data[frame] = "Minor Mention"
112
- return frame_data
113
-
114
  # Fallback method for frame extraction
115
  def extract_frames_fallback(text):
116
- detected_frames = {}
117
  text_lower = text.lower()
118
  for category, keywords in frame_categories.items():
119
  if any(word in text_lower for word in keywords):
120
- detected_frames[category] = "Minor Mention"
121
- return detected_frames
122
 
123
  # Extract captions from DOCX
124
  def extract_captions_from_docx(docx_file):
@@ -134,14 +121,44 @@ def extract_captions_from_docx(docx_file):
134
  captions[current_post].append(text)
135
  return {post: " ".join(lines) for post, lines in captions.items() if lines}
136
 
137
- # Generate DOCX file for download
138
- def generate_docx(data):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  doc = Document()
140
- for post, content in data.items():
141
- doc.add_heading(post, level=1)
142
- for key, value in content.items():
143
  doc.add_paragraph(f"{key}: {value}")
144
- doc.add_paragraph() # Add space between posts
145
  return doc
146
 
147
  # Streamlit app
@@ -198,15 +215,16 @@ if uploaded_excel:
198
  if output_data:
199
  st.write(output_data)
200
 
201
- # Generate DOCX for download
202
- doc = generate_docx(output_data)
203
- doc_io = io.BytesIO()
204
- doc.save(doc_io)
205
- doc_io.seek(0)
 
206
 
207
  st.download_button(
208
- label="Download Extracted Data",
209
- data=doc_io,
210
  file_name="extracted_data.docx",
211
  mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
212
  )
 
93
  try:
94
  response = llm.chat([{"role": "system", "content": "Classify the following text into relevant activism frames and assign Major, Significant, or Minor focus."},
95
  {"role": "user", "content": text}])
96
+ return response["choices"][0]["message"]["content"]
97
  except Exception as e:
98
  logging.error(f"Groq API error: {e}")
99
  return extract_frames_fallback(text)
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  # Fallback method for frame extraction
102
  def extract_frames_fallback(text):
103
+ detected_frames = set()
104
  text_lower = text.lower()
105
  for category, keywords in frame_categories.items():
106
  if any(word in text_lower for word in keywords):
107
+ detected_frames.add(category)
108
+ return list(detected_frames)
109
 
110
  # Extract captions from DOCX
111
  def extract_captions_from_docx(docx_file):
 
121
  captions[current_post].append(text)
122
  return {post: " ".join(lines) for post, lines in captions.items() if lines}
123
 
124
+ # Function to extract metadata from an Excel file
125
+ def extract_metadata_from_excel(excel_file):
126
+ df = pd.read_excel(excel_file)
127
+ extracted_data = []
128
+
129
+ for index, row in df.iterrows():
130
+ post_data = {
131
+ "Post Number": f"Post {index + 1}",
132
+ "Date of Post": row.get("Date", "N/A"),
133
+ "Media Type": row.get("Media Type", "N/A"),
134
+ "Number of Pictures": row.get("Number of Pictures", 0),
135
+ "Number of Videos": row.get("Number of Videos", 0),
136
+ "Number of Audios": row.get("Number of Audios", 0),
137
+ "Likes": row.get("Likes", 0),
138
+ "Comments": row.get("Comments", 0),
139
+ }
140
+ extracted_data.append(post_data)
141
+
142
+ return extracted_data
143
+
144
+ # Merge metadata from Excel with the generated data
145
+ def merge_metadata_with_generated_data(generated_data, excel_metadata):
146
+ for post_data in excel_metadata:
147
+ post_number = post_data["Post Number"]
148
+ if post_number in generated_data:
149
+ generated_data[post_number].update(post_data)
150
+ else:
151
+ generated_data[post_number] = post_data
152
+ return generated_data
153
+
154
+ # Function to create DOCX from extracted data
155
+ def create_docx_from_data(extracted_data):
156
  doc = Document()
157
+ for post_number, data in extracted_data.items():
158
+ doc.add_heading(post_number, level=1)
159
+ for key, value in data.items():
160
  doc.add_paragraph(f"{key}: {value}")
161
+ doc.add_paragraph("\n") # Add a line break between posts
162
  return doc
163
 
164
  # Streamlit app
 
215
  if output_data:
216
  st.write(output_data)
217
 
218
+ # Create DOCX file for download
219
+ if output_data:
220
+ doc = create_docx_from_data(output_data)
221
+ docx_io = io.BytesIO()
222
+ doc.save(docx_io)
223
+ docx_io.seek(0)
224
 
225
  st.download_button(
226
+ label="Download Extracted Data as DOCX",
227
+ data=docx_io,
228
  file_name="extracted_data.docx",
229
  mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
230
  )