ahm14 commited on
Commit
985e391
·
verified ·
1 Parent(s): 56ea0c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -223
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import os
3
  import pandas as pd
4
  import streamlit as st
@@ -12,135 +11,55 @@ import io
12
  from langdetect import detect
13
  from collections import Counter
14
  from dotenv import load_dotenv
15
- from langchain_groq import ChatGroq
16
- from langchain_core.output_parsers import StrOutputParser
17
- from langchain_core.prompts import ChatPromptTemplate
18
- from transformers import pipeline
19
- from nltk.tokenize import sent_tokenize
20
- from rake_nltk import Rake
21
 
22
  # Load environment variables
23
  load_dotenv()
24
 
25
- # Check if Groq API key is available
26
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
27
- if not GROQ_API_KEY:
28
- logging.error("Missing Groq API key. Please set the GROQ_API_KEY environment variable.")
29
- st.error("API key is missing. Please provide a valid API key.")
30
-
31
  # Initialize logging
32
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
33
 
34
- # Initialize LLM (Groq API)
35
- llm = ChatGroq(temperature=0.5, groq_api_key=GROQ_API_KEY, model_name="llama3-8b-8192")
36
-
37
- # Download required NLTK resources
38
- nltk.download("punkt")
39
- nltk.download("punkt_tab")
40
- nltk.download("stopwords")
41
-
42
- # Tone categories for fallback method
43
- tone_categories = {
44
- "Emotional": ["urgent", "violence", "disappearances", "forced", "killing", "crisis", "concern"],
45
- "Harsh": ["corrupt", "oppression", "failure", "repression", "exploit", "unjust", "authoritarian"],
46
- "Somber": ["tragedy", "loss", "pain", "sorrow", "mourning", "grief", "devastation"],
47
- "Motivational": ["rise", "resist", "mobilize", "inspire", "courage", "change", "determination"],
48
- "Informative": ["announcement", "event", "scheduled", "update", "details", "protest", "statement"],
49
- "Positive": ["progress", "unity", "hope", "victory", "together", "solidarity", "uplifting"],
50
- "Angry": ["rage", "injustice", "fury", "resentment", "outrage", "betrayal"],
51
- "Fearful": ["threat", "danger", "terror", "panic", "risk", "warning"],
52
- "Sarcastic": ["brilliant", "great job", "amazing", "what a surprise", "well done", "as expected"],
53
- "Hopeful": ["optimism", "better future", "faith", "confidence", "looking forward"]
54
- }
55
-
56
- # Frame categories for fallback method
57
- frame_categories = {
58
- "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
59
- "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
60
- "Gender & Patriarchy": ["gender", "women", "violence", "patriarchy", "equality"],
61
- "Religious Freedom & Persecution": ["religion", "persecution", "minorities", "intolerance", "faith"],
62
- "Grassroots Mobilization": ["activism", "community", "movement", "local", "mobilization"],
63
- "Environmental Crisis & Activism": ["climate", "deforestation", "water", "pollution", "sustainability"],
64
- "Anti-Extremism & Anti-Violence": ["extremism", "violence", "hate speech", "radicalism", "mob attack"],
65
- "Social Inequality & Economic Disparities": ["class privilege", "labor rights", "economic", "discrimination"],
66
- "Activism & Advocacy": ["justice", "rights", "demand", "protest", "march", "campaign", "freedom of speech"],
67
- "Systemic Oppression": ["discrimination", "oppression", "minorities", "marginalized", "exclusion"],
68
- "Intersectionality": ["intersecting", "women", "minorities", "struggles", "multiple oppression"],
69
- "Call to Action": ["join us", "sign petition", "take action", "mobilize", "support movement"],
70
- "Empowerment & Resistance": ["empower", "resist", "challenge", "fight for", "stand up"],
71
- "Climate Justice": ["environment", "climate change", "sustainability", "biodiversity", "pollution"],
72
- "Human Rights Advocacy": ["human rights", "violations", "honor killing", "workplace discrimination", "law reform"]
73
- }
74
-
75
- def suggest_themes(keywords):
76
  """
77
- Suggest themes based on extracted keywords using a simple mapping.
78
- You can adjust the mapping dictionary as needed.
79
  """
80
- theme_mapping = {
81
- "violence": "Conflict",
82
- "crisis": "Conflict",
83
- "repression": "Oppression",
84
- "oppression": "Oppression",
85
- "freedom": "Empowerment",
86
- "hope": "Optimism",
87
- "unity": "Solidarity",
88
- "progress": "Advancement",
89
- "justice": "Social Justice",
90
- "rights": "Social Justice",
91
- "equality": "Equality",
92
- "exploitation": "Exploitation",
93
- "mobilize": "Mobilization",
94
- "protest": "Activism",
95
- "environment": "Environmental",
96
- "climate": "Environmental"
97
- }
98
- suggested = set()
99
- for kw in keywords:
100
- lower_kw = kw.lower()
101
- for key, theme in theme_mapping.items():
102
- if key in lower_kw:
103
- suggested.add(theme)
104
- return list(suggested)
105
 
106
- def suggest_frames(themes):
107
  """
108
- Suggest frames based on the suggested themes.
109
- Adjust this mapping to reflect the relationship between themes and your framing categories.
110
  """
111
- frame_mapping = {
112
- "Conflict": "Anti-Extremism & Anti-Violence",
113
- "Oppression": "Systemic Oppression",
114
- "Empowerment": "Empowerment & Resistance",
115
- "Optimism": "Hopeful",
116
- "Solidarity": "Positive",
117
- "Advancement": "Informative",
118
- "Social Justice": "Human Rights & Justice",
119
- "Equality": "Gender & Patriarchy",
120
- "Exploitation": "Political & State Accountability",
121
- "Mobilization": "Grassroots Mobilization",
122
- "Activism": "Activism & Advocacy",
123
- "Environmental": "Environmental Crisis & Activism"
124
- }
125
- suggested_frames = set()
126
- for theme in themes:
127
- for key, frame in frame_mapping.items():
128
- if key.lower() in theme.lower():
129
- suggested_frames.add(frame)
130
- return list(suggested_frames)
131
-
132
 
133
- def extract_keywords(text):
134
- # Initialize RAKE with default NLTK stopwords
135
- r = Rake()
136
- # Extract keywords from the text
137
- r.extract_keywords_from_text(text)
138
- # Get ranked phrases (highest ranking first)
139
- ranked_phrases = r.get_ranked_phrases()
140
- # Return only the top N keywords
141
- return ranked_phrases
142
 
143
- # Detect language
144
  def detect_language(text):
145
  try:
146
  return detect(text)
@@ -148,91 +67,9 @@ def detect_language(text):
148
  logging.error(f"Error detecting language: {e}")
149
  return "unknown"
150
 
151
- # Extract tone using Groq API (or fallback method)
152
- def extract_tone(text):
153
- try:
154
- response = llm.chat([{"role": "system", "content": "Analyze the tone of the following text and provide descriptive tone labels."},
155
- {"role": "user", "content": text}])
156
- return response["choices"][0]["message"]["content"].split(", ")
157
- except Exception as e:
158
- logging.error(f"Groq API error: {e}")
159
- return extract_tone_fallback(text)
160
-
161
- # Fallback method for tone extraction
162
- def extract_tone_fallback(text):
163
- detected_tones = set()
164
- text_lower = text.lower()
165
- for category, keywords in tone_categories.items():
166
- if any(word in text_lower for word in keywords):
167
- detected_tones.add(category)
168
- return list(detected_tones) if detected_tones else ["Neutral"]
169
-
170
- # Extract hashtags
171
  def extract_hashtags(text):
172
  return re.findall(r"#\w+", text)
173
 
174
- # -------------------------------------------------------------------
175
- # New functions for frame categorization and display
176
- # -------------------------------------------------------------------
177
-
178
- def get_frame_category_mapping(text):
179
- """
180
- Returns a mapping of every frame (from frame_categories) to one of the four categories.
181
- Detected frames are assigned a focus level based on keyword frequency:
182
- - Top detected: "Major Focus"
183
- - Next up to two: "Significant Focus"
184
- - Remaining detected: "Minor Mention"
185
- Frames not detected get "Not Applicable".
186
- """
187
- text_lower = text.lower()
188
- # Calculate frequency for each frame
189
- frame_freq = {}
190
- for frame, keywords in frame_categories.items():
191
- freq = sum(1 for word in keywords if word in text_lower)
192
- frame_freq[frame] = freq
193
-
194
- # Identify detected frames (frequency > 0) and sort descending
195
- detected = [(frame, freq) for frame, freq in frame_freq.items() if freq > 0]
196
- detected.sort(key=lambda x: x[1], reverse=True)
197
-
198
- category_mapping = {}
199
- if detected:
200
- # Highest frequency frame as Major Focus
201
- category_mapping[detected[0][0]] = "Major Focus"
202
- # Next up to two frames as Significant Focus
203
- for frame, _ in detected[1:3]:
204
- category_mapping[frame] = "Significant Focus"
205
- # Remaining detected frames as Minor Mention
206
- for frame, _ in detected[3:]:
207
- category_mapping[frame] = "Minor Mention"
208
- # For frames not detected, assign Not Applicable
209
- for frame in frame_categories.keys():
210
- if frame not in category_mapping:
211
- category_mapping[frame] = "Not Applicable"
212
- return category_mapping
213
-
214
- def format_frame_categories_table(category_mapping):
215
- """
216
- Returns a markdown-formatted table displaying each frame with columns:
217
- Major Focus, Significant Focus, Minor Mention, and Not Applicable.
218
- A tick (✓) marks the assigned category.
219
- """
220
- header = "| Frame | Major Focus | Significant Focus | Minor Mention | Not Applicable |\n"
221
- header += "| --- | --- | --- | --- | --- |\n"
222
- tick = "✓"
223
- rows = ""
224
- for frame, category in category_mapping.items():
225
- major = tick if category == "Major Focus" else ""
226
- significant = tick if category == "Significant Focus" else ""
227
- minor = tick if category == "Minor Mention" else ""
228
- not_applicable = tick if category == "Not Applicable" else ""
229
- rows += f"| {frame} | {major} | {significant} | {minor} | {not_applicable} |\n"
230
- return header + rows
231
-
232
- # -------------------------------------------------------------------
233
- # Existing functions for file processing
234
- # -------------------------------------------------------------------
235
-
236
  def extract_captions_from_docx(docx_file):
237
  doc = Document(docx_file)
238
  captions = {}
@@ -264,6 +101,59 @@ def merge_metadata_with_generated_data(generated_data, excel_metadata):
264
  generated_data[post_number] = post_data
265
  return generated_data
266
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  def create_docx_from_data(extracted_data):
268
  doc = Document()
269
  for post_number, data in extracted_data.items():
@@ -271,18 +161,15 @@ def create_docx_from_data(extracted_data):
271
  ordered_keys = [
272
  "Post Number", "Date of Post", "Media Type", "Number of Pictures",
273
  "Number of Videos", "Number of Audios", "Likes", "Comments", "Tagged Audience",
274
- "Full Caption", "Language", "Tone", "Hashtags", "Keywords" # Added "Keywords"
275
  ]
276
  for key in ordered_keys:
277
  value = data.get(key, "N/A")
278
  if key in ["Tone", "Hashtags", "Keywords"]:
279
- # For keywords, join the list to a comma-separated string
280
  value = ", ".join(value) if isinstance(value, list) else value
281
  para = doc.add_paragraph()
282
  run = para.add_run(f"**{key}:** {value}")
283
  run.font.size = Pt(11)
284
-
285
- # Existing code to add the Frames table (if present)
286
  if "FramesMapping" in data:
287
  doc.add_paragraph("Frames:")
288
  mapping = data["FramesMapping"]
@@ -305,15 +192,10 @@ def create_docx_from_data(extracted_data):
305
  else:
306
  value = data.get("Frames", "N/A")
307
  doc.add_paragraph(f"**Frames:** {value}")
308
-
309
- # --- New: Table for Keywords, Themes, and Frames ---
310
- # Assume that 'Keywords' is already extracted and stored in data.
311
  keywords = data.get("Keywords", [])
312
- # Generate suggested themes and frames from keywords
313
  themes = suggest_themes(keywords) if keywords else []
314
- frames_from_themes = suggest_frames(themes) if themes else []
315
-
316
- # Create a new table with 3 columns: Keywords, Themes, Frames
317
  doc.add_paragraph("Summary Table:")
318
  summary_table = doc.add_table(rows=1, cols=3)
319
  summary_table.style = "Light List Accent 1"
@@ -321,21 +203,16 @@ def create_docx_from_data(extracted_data):
321
  hdr_cells[0].text = "Keywords"
322
  hdr_cells[1].text = "Themes"
323
  hdr_cells[2].text = "Frames"
324
-
325
  row_cells = summary_table.add_row().cells
326
  row_cells[0].text = ", ".join(keywords) if keywords else "N/A"
327
  row_cells[1].text = ", ".join(themes) if themes else "N/A"
328
- row_cells[2].text = ", ".join(frames_from_themes) if frames_from_themes else "N/A"
329
-
 
330
  doc.add_paragraph("\n")
331
  return doc
332
 
333
-
334
-
335
- # -------------------------------------------------------------------
336
- # Streamlit App UI
337
- # -------------------------------------------------------------------
338
-
339
  st.title("AI-Powered Coding Sheet Generator")
340
  st.write("Enter text or upload a DOCX/Excel file for analysis:")
341
 
@@ -348,14 +225,18 @@ output_data = {}
348
  if input_text:
349
  frame_mapping = get_frame_category_mapping(input_text)
350
  frames_table = format_frame_categories_table(frame_mapping)
 
 
 
 
351
  output_data["Manual Input"] = {
352
  "Full Caption": input_text,
353
  "Language": detect_language(input_text),
354
- "Tone": extract_tone(input_text),
355
  "Hashtags": extract_hashtags(input_text),
356
  "Frames": frames_table,
357
  "FramesMapping": frame_mapping,
358
- "Keywords": extract_keywords(input_text)
359
  }
360
 
361
  if uploaded_docx:
@@ -363,14 +244,16 @@ if uploaded_docx:
363
  for caption, text in captions.items():
364
  frame_mapping = get_frame_category_mapping(text)
365
  frames_table = format_frame_categories_table(frame_mapping)
 
 
366
  output_data[caption] = {
367
  "Full Caption": text,
368
  "Language": detect_language(text),
369
- "Tone": extract_tone(text),
370
  "Hashtags": extract_hashtags(text),
371
  "Frames": frames_table,
372
  "FramesMapping": frame_mapping,
373
- "Keywords": extract_keywords(text)
374
  }
375
 
376
  if uploaded_excel:
@@ -391,4 +274,4 @@ if output_data:
391
  docx_io = io.BytesIO()
392
  docx_output.save(docx_io)
393
  docx_io.seek(0)
394
- st.download_button("Download Merged Analysis as DOCX", data=docx_io, file_name="coding_sheet.docx")
 
 
1
  import os
2
  import pandas as pd
3
  import streamlit as st
 
11
  from langdetect import detect
12
  from collections import Counter
13
  from dotenv import load_dotenv
14
+ from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 
 
 
15
 
16
  # Load environment variables
17
  load_dotenv()
18
 
 
 
 
 
 
 
19
  # Initialize logging
20
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
21
 
22
+ # --- Initialize DeepSeek-V3-0324 locally ---
23
+ MODEL_NAME = "deepseek-ai/DeepSeek-V3-0324"
24
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, trust_remote_code=True)
25
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
26
+
27
+ def generate_response(prompt: str, max_length: int = 150, temperature: float = 0.5) -> str:
28
+ input_ids = tokenizer.encode(prompt, return_tensors="pt")
29
+ outputs = model.generate(
30
+ input_ids,
31
+ max_length=max_length,
32
+ do_sample=True,
33
+ temperature=temperature,
34
+ top_p=0.95
35
+ )
36
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
37
+ return result.strip()
38
+
39
+ def extract_keywords(text: str) -> list:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  """
41
+ Use DeepSeek-V3-0324 to extract keywords from the input text.
42
+ The prompt asks for a comma-separated list.
43
  """
44
+ prompt = (f"Extract the most important keywords from the following text. "
45
+ f"Return them as a comma-separated list.\n\nText: \"{text}\"")
46
+ response = generate_response(prompt, max_length=100, temperature=0.5)
47
+ keywords = [kw.strip() for kw in response.split(",") if kw.strip()]
48
+ return keywords
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ def suggest_themes(keywords: list) -> list:
51
  """
52
+ Use DeepSeek-V3-0324 to suggest relevant themes based on the extracted keywords.
 
53
  """
54
+ keywords_str = ", ".join(keywords)
55
+ prompt = (f"Based on the following keywords: {keywords_str}, "
56
+ f"suggest a list of relevant themes. Return them as a comma-separated list.")
57
+ response = generate_response(prompt, max_length=100, temperature=0.5)
58
+ themes = [theme.strip() for theme in response.split(",") if theme.strip()]
59
+ return themes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
+ # --- Retain or slightly adjust other helper functions ---
 
 
 
 
 
 
 
 
62
 
 
63
  def detect_language(text):
64
  try:
65
  return detect(text)
 
67
  logging.error(f"Error detecting language: {e}")
68
  return "unknown"
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  def extract_hashtags(text):
71
  return re.findall(r"#\w+", text)
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  def extract_captions_from_docx(docx_file):
74
  doc = Document(docx_file)
75
  captions = {}
 
101
  generated_data[post_number] = post_data
102
  return generated_data
103
 
104
+ def format_frame_categories_table(category_mapping):
105
+ header = "| Frame | Major Focus | Significant Focus | Minor Mention | Not Applicable |\n"
106
+ header += "| --- | --- | --- | --- | --- |\n"
107
+ tick = "✓"
108
+ rows = ""
109
+ for frame, category in category_mapping.items():
110
+ major = tick if category == "Major Focus" else ""
111
+ significant = tick if category == "Significant Focus" else ""
112
+ minor = tick if category == "Minor Mention" else ""
113
+ not_applicable = tick if category == "Not Applicable" else ""
114
+ rows += f"| {frame} | {major} | {significant} | {minor} | {not_applicable} |\n"
115
+ return header + rows
116
+
117
+ def get_frame_category_mapping(text):
118
+ """
119
+ Returns a mapping for frames based on the frequency of certain keywords.
120
+ """
121
+ text_lower = text.lower()
122
+ frame_categories = {
123
+ "Human Rights & Justice": ["rights", "law", "justice", "legal", "humanitarian"],
124
+ "Political & State Accountability": ["government", "policy", "state", "corruption", "accountability"],
125
+ "Gender & Patriarchy": ["gender", "women", "violence", "patriarchy", "equality"],
126
+ "Religious Freedom & Persecution": ["religion", "persecution", "minorities", "intolerance", "faith"],
127
+ "Grassroots Mobilization": ["activism", "community", "movement", "local", "mobilization"],
128
+ "Environmental Crisis & Activism": ["climate", "deforestation", "water", "pollution", "sustainability"],
129
+ "Anti-Extremism & Anti-Violence": ["extremism", "violence", "hate speech", "radicalism", "mob attack"],
130
+ "Social Inequality & Economic Disparities": ["class privilege", "labor rights", "economic", "discrimination"],
131
+ "Activism & Advocacy": ["justice", "rights", "demand", "protest", "march", "campaign", "freedom of speech"],
132
+ "Systemic Oppression": ["discrimination", "oppression", "minorities", "marginalized", "exclusion"],
133
+ "Intersectionality": ["intersecting", "women", "minorities", "struggles", "multiple oppression"],
134
+ "Call to Action": ["join us", "sign petition", "take action", "mobilize", "support movement"],
135
+ "Empowerment & Resistance": ["empower", "resist", "challenge", "fight for", "stand up"],
136
+ "Climate Justice": ["environment", "climate change", "sustainability", "biodiversity", "pollution"],
137
+ "Human Rights Advocacy": ["human rights", "violations", "honor killing", "workplace discrimination", "law reform"]
138
+ }
139
+ frame_freq = {}
140
+ for frame, keywords in frame_categories.items():
141
+ freq = sum(1 for word in keywords if word in text_lower)
142
+ frame_freq[frame] = freq
143
+ detected = [(frame, freq) for frame, freq in frame_freq.items() if freq > 0]
144
+ detected.sort(key=lambda x: x[1], reverse=True)
145
+ category_mapping = {}
146
+ if detected:
147
+ category_mapping[detected[0][0]] = "Major Focus"
148
+ for frame, _ in detected[1:3]:
149
+ category_mapping[frame] = "Significant Focus"
150
+ for frame, _ in detected[3:]:
151
+ category_mapping[frame] = "Minor Mention"
152
+ for frame in frame_categories.keys():
153
+ if frame not in category_mapping:
154
+ category_mapping[frame] = "Not Applicable"
155
+ return category_mapping
156
+
157
  def create_docx_from_data(extracted_data):
158
  doc = Document()
159
  for post_number, data in extracted_data.items():
 
161
  ordered_keys = [
162
  "Post Number", "Date of Post", "Media Type", "Number of Pictures",
163
  "Number of Videos", "Number of Audios", "Likes", "Comments", "Tagged Audience",
164
+ "Full Caption", "Language", "Tone", "Hashtags", "Keywords"
165
  ]
166
  for key in ordered_keys:
167
  value = data.get(key, "N/A")
168
  if key in ["Tone", "Hashtags", "Keywords"]:
 
169
  value = ", ".join(value) if isinstance(value, list) else value
170
  para = doc.add_paragraph()
171
  run = para.add_run(f"**{key}:** {value}")
172
  run.font.size = Pt(11)
 
 
173
  if "FramesMapping" in data:
174
  doc.add_paragraph("Frames:")
175
  mapping = data["FramesMapping"]
 
192
  else:
193
  value = data.get("Frames", "N/A")
194
  doc.add_paragraph(f"**Frames:** {value}")
195
+ # --- New: Summary Table for Keywords, Themes, and Frames ---
 
 
196
  keywords = data.get("Keywords", [])
197
+ # Generate themes using DeepSeek-based function
198
  themes = suggest_themes(keywords) if keywords else []
 
 
 
199
  doc.add_paragraph("Summary Table:")
200
  summary_table = doc.add_table(rows=1, cols=3)
201
  summary_table.style = "Light List Accent 1"
 
203
  hdr_cells[0].text = "Keywords"
204
  hdr_cells[1].text = "Themes"
205
  hdr_cells[2].text = "Frames"
 
206
  row_cells = summary_table.add_row().cells
207
  row_cells[0].text = ", ".join(keywords) if keywords else "N/A"
208
  row_cells[1].text = ", ".join(themes) if themes else "N/A"
209
+ frames_from_mapping = data.get("FramesMapping", {})
210
+ frames_list = ", ".join([f"{frame} ({cat})" for frame, cat in frames_from_mapping.items()])
211
+ row_cells[2].text = frames_list if frames_list else "N/A"
212
  doc.add_paragraph("\n")
213
  return doc
214
 
215
+ # --- Streamlit App UI ---
 
 
 
 
 
216
  st.title("AI-Powered Coding Sheet Generator")
217
  st.write("Enter text or upload a DOCX/Excel file for analysis:")
218
 
 
225
  if input_text:
226
  frame_mapping = get_frame_category_mapping(input_text)
227
  frames_table = format_frame_categories_table(frame_mapping)
228
+ # Use the DeepSeek-based keyword extraction
229
+ keywords = extract_keywords(input_text)
230
+ # For demonstration, reusing the extract_keywords for Tone as well (consider creating a dedicated tone function)
231
+ tone = extract_keywords(input_text)
232
  output_data["Manual Input"] = {
233
  "Full Caption": input_text,
234
  "Language": detect_language(input_text),
235
+ "Tone": tone,
236
  "Hashtags": extract_hashtags(input_text),
237
  "Frames": frames_table,
238
  "FramesMapping": frame_mapping,
239
+ "Keywords": keywords
240
  }
241
 
242
  if uploaded_docx:
 
244
  for caption, text in captions.items():
245
  frame_mapping = get_frame_category_mapping(text)
246
  frames_table = format_frame_categories_table(frame_mapping)
247
+ keywords = extract_keywords(text)
248
+ tone = extract_keywords(text)
249
  output_data[caption] = {
250
  "Full Caption": text,
251
  "Language": detect_language(text),
252
+ "Tone": tone,
253
  "Hashtags": extract_hashtags(text),
254
  "Frames": frames_table,
255
  "FramesMapping": frame_mapping,
256
+ "Keywords": keywords
257
  }
258
 
259
  if uploaded_excel:
 
274
  docx_io = io.BytesIO()
275
  docx_output.save(docx_io)
276
  docx_io.seek(0)
277
+ st.download_button("Download Merged Analysis as DOCX", data=docx_io, file_name="coding_sheet.docx")