Em4e commited on
Commit
fc54d8b
·
verified ·
1 Parent(s): 92ff4ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -33
app.py CHANGED
@@ -89,8 +89,10 @@ class WebpageContentProcessor:
89
  structured_chunks = []
90
  for i, node in enumerate(nodes):
91
  content = node.get_content(metadata_mode=MetadataMode.NONE).strip()
 
 
92
  title_match = re.match(r"^(#+)\s*(.*)", content)
93
- title = title_match.group(2).strip() if title_match else (content.split('\n')[0][:70] + "...")
94
  structured_chunks.append({"id": i, "title": title, "content": content})
95
  return structured_chunks
96
 
@@ -159,6 +161,7 @@ class ChunkManager:
159
 
160
  def delete_chunk(self, chunk_id: int):
161
  self._chunks = [c for c in self._chunks if c["id"] != chunk_id]
 
162
  for i, chunk in enumerate(self._chunks):
163
  chunk['id'] = i
164
 
@@ -172,19 +175,26 @@ class ChunkManager:
172
  self.target_grade_max = grade_max
173
  self.target_min_chunk_words = min_words
174
  self.target_max_chunk_words = max_words
175
- self.set_chunks(self.get_chunks()) # Recalculate stats with new targets
 
176
 
 
177
  st.set_page_config(layout="wide", page_title="Webpage Content Editor")
178
 
179
- # Initialize session state variables
180
- if 'chunk_manager' not in st.session_state:
181
- st.session_state.chunk_manager = ChunkManager()
182
- if 'content_processor' not in st.session_state:
183
- st.session_state.content_processor = WebpageContentProcessor()
184
- if 'selected_chunk_id' not in st.session_state:
185
- st.session_state.selected_chunk_id = None
186
- if 'status_message' not in st.session_state:
187
- st.session_state.status_message = ""
 
 
 
 
 
188
 
189
  processor = st.session_state.content_processor
190
  manager = st.session_state.chunk_manager
@@ -198,29 +208,31 @@ st.info(
198
  icon="ℹ️"
199
  )
200
 
201
- url_input = st.text_input("Enter a webpage URL to begin", key="url_input")
202
 
203
  if st.button("Process URL", use_container_width=True):
204
- if url_input:
 
205
  with st.spinner("Fetching and processing content..."):
206
- markdown = processor.fetch_and_convert_to_markdown(url_input)
207
  if "Error" in markdown:
208
  st.session_state.status_message = markdown
209
  manager.set_chunks([])
 
210
  else:
211
  chunks = processor.parse_markdown_into_chunks(markdown)
212
  manager.set_chunks(chunks)
213
- st.session_state.status_message = f"Successfully processed {len(chunks)} chunks." if chunks else "Could not extract content chunks."
214
-
215
- if manager.get_chunks():
216
- st.session_state.selected_chunk_id = manager.get_chunks()[0]['id']
217
- else:
218
- st.session_state.selected_chunk_id = None
219
  st.rerun()
220
 
221
  if st.session_state.status_message:
222
  st.toast(st.session_state.status_message)
223
- st.session_state.status_message = "" # Clear message after showing
224
 
225
  tab1, tab2 = st.tabs(["Chunk Editor", "Settings & Overview"])
226
 
@@ -229,23 +241,28 @@ with tab1:
229
  if not chunks:
230
  st.write("Process a URL to start editing chunks.")
231
  else:
 
232
  # Ensure selected_chunk_id is valid
233
- if st.session_state.selected_chunk_id not in [c['id'] for c in chunks]:
234
- st.session_state.selected_chunk_id = chunks[0]['id'] if chunks else None
235
 
236
  if st.session_state.selected_chunk_id is not None:
237
  chunk_options = {c['id']: f"Chunk {c['id']}: {c['title']}" for c in chunks}
238
 
239
- # This selectbox now directly manages `selected_chunk_id` in the session state.
240
- # When a user makes a selection, Streamlit automatically updates the state and reruns the script.
241
- st.selectbox(
242
  "Select a chunk to edit",
243
- options=list(chunk_options.keys()),
244
  format_func=lambda x: chunk_options.get(x, "Invalid Chunk"),
245
- key="selected_chunk_id", # The key is now the session state variable itself
246
- index=list(chunk_options.keys()).index(st.session_state.selected_chunk_id)
247
  )
248
 
 
 
 
 
 
249
  selected_chunk = manager.get_chunk_by_id(st.session_state.selected_chunk_id)
250
 
251
  if selected_chunk:
@@ -255,7 +272,7 @@ with tab1:
255
  "Chunk Content",
256
  value=selected_chunk['content'],
257
  height=300,
258
- key=f"editor_{selected_chunk['id']}" # Unique key forces re-render
259
  )
260
 
261
  col1, col2, _ = st.columns([1, 1, 4])
@@ -265,10 +282,8 @@ with tab1:
265
  st.rerun()
266
 
267
  if col2.button("Delete Chunk", use_container_width=True, key=f"delete_{selected_chunk['id']}"):
268
- old_id = selected_chunk['id']
269
- manager.delete_chunk(old_id)
270
  st.session_state.status_message = "Chunk deleted!"
271
- # Select the next available chunk or none if empty
272
  remaining_chunks = manager.get_chunks()
273
  st.session_state.selected_chunk_id = remaining_chunks[0]['id'] if remaining_chunks else None
274
  st.rerun()
 
89
  structured_chunks = []
90
  for i, node in enumerate(nodes):
91
  content = node.get_content(metadata_mode=MetadataMode.NONE).strip()
92
+ if not content:
93
+ continue
94
  title_match = re.match(r"^(#+)\s*(.*)", content)
95
+ title = title_match.group(2).strip() if title_match and title_match.group(2).strip() else (content.split('\n')[0][:70] + "...")
96
  structured_chunks.append({"id": i, "title": title, "content": content})
97
  return structured_chunks
98
 
 
161
 
162
  def delete_chunk(self, chunk_id: int):
163
  self._chunks = [c for c in self._chunks if c["id"] != chunk_id]
164
+ # Re-index remaining chunks to maintain sequential IDs
165
  for i, chunk in enumerate(self._chunks):
166
  chunk['id'] = i
167
 
 
175
  self.target_grade_max = grade_max
176
  self.target_min_chunk_words = min_words
177
  self.target_max_chunk_words = max_words
178
+ # Recalculate stats for all chunks with new targets
179
+ self.set_chunks(self.get_chunks())
180
 
181
+ # --- Streamlit UI ---
182
  st.set_page_config(layout="wide", page_title="Webpage Content Editor")
183
 
184
+ # Initialize session state variables if they don't exist
185
+ def init_session_state():
186
+ if 'chunk_manager' not in st.session_state:
187
+ st.session_state.chunk_manager = ChunkManager()
188
+ if 'content_processor' not in st.session_state:
189
+ st.session_state.content_processor = WebpageContentProcessor()
190
+ if 'selected_chunk_id' not in st.session_state:
191
+ st.session_state.selected_chunk_id = None
192
+ if 'status_message' not in st.session_state:
193
+ st.session_state.status_message = ""
194
+ if 'url_input' not in st.session_state:
195
+ st.session_state.url_input = ""
196
+
197
+ init_session_state()
198
 
199
  processor = st.session_state.content_processor
200
  manager = st.session_state.chunk_manager
 
208
  icon="ℹ️"
209
  )
210
 
211
+ url_input = st.text_input("Enter a webpage URL to begin", value=st.session_state.url_input, key="url_input_widget")
212
 
213
  if st.button("Process URL", use_container_width=True):
214
+ st.session_state.url_input = st.session_state.url_input_widget
215
+ if st.session_state.url_input:
216
  with st.spinner("Fetching and processing content..."):
217
+ markdown = processor.fetch_and_convert_to_markdown(st.session_state.url_input)
218
  if "Error" in markdown:
219
  st.session_state.status_message = markdown
220
  manager.set_chunks([])
221
+ st.session_state.selected_chunk_id = None
222
  else:
223
  chunks = processor.parse_markdown_into_chunks(markdown)
224
  manager.set_chunks(chunks)
225
+ if chunks:
226
+ st.session_state.status_message = f"Successfully processed {len(chunks)} chunks."
227
+ st.session_state.selected_chunk_id = chunks[0]['id']
228
+ else:
229
+ st.session_state.status_message = "Could not extract content chunks."
230
+ st.session_state.selected_chunk_id = None
231
  st.rerun()
232
 
233
  if st.session_state.status_message:
234
  st.toast(st.session_state.status_message)
235
+ st.session_state.status_message = ""
236
 
237
  tab1, tab2 = st.tabs(["Chunk Editor", "Settings & Overview"])
238
 
 
241
  if not chunks:
242
  st.write("Process a URL to start editing chunks.")
243
  else:
244
+ chunk_ids = [c['id'] for c in chunks]
245
  # Ensure selected_chunk_id is valid
246
+ if st.session_state.selected_chunk_id not in chunk_ids:
247
+ st.session_state.selected_chunk_id = chunk_ids[0] if chunk_ids else None
248
 
249
  if st.session_state.selected_chunk_id is not None:
250
  chunk_options = {c['id']: f"Chunk {c['id']}: {c['title']}" for c in chunks}
251
 
252
+ # The selectbox's state is now managed directly by st.session_state.selected_chunk_id
253
+ # When the user selects a new option, Streamlit automatically updates this state variable and reruns the script.
254
+ selected_id = st.selectbox(
255
  "Select a chunk to edit",
256
+ options=chunk_ids,
257
  format_func=lambda x: chunk_options.get(x, "Invalid Chunk"),
258
+ index=chunk_ids.index(st.session_state.selected_chunk_id)
 
259
  )
260
 
261
+ # Update the session state ONLY if the user selection has changed
262
+ if selected_id != st.session_state.selected_chunk_id:
263
+ st.session_state.selected_chunk_id = selected_id
264
+ st.rerun()
265
+
266
  selected_chunk = manager.get_chunk_by_id(st.session_state.selected_chunk_id)
267
 
268
  if selected_chunk:
 
272
  "Chunk Content",
273
  value=selected_chunk['content'],
274
  height=300,
275
+ key=f"editor_{selected_chunk['id']}" # Unique key forces widget to re-render on selection change
276
  )
277
 
278
  col1, col2, _ = st.columns([1, 1, 4])
 
282
  st.rerun()
283
 
284
  if col2.button("Delete Chunk", use_container_width=True, key=f"delete_{selected_chunk['id']}"):
285
+ manager.delete_chunk(selected_chunk['id'])
 
286
  st.session_state.status_message = "Chunk deleted!"
 
287
  remaining_chunks = manager.get_chunks()
288
  st.session_state.selected_chunk_id = remaining_chunks[0]['id'] if remaining_chunks else None
289
  st.rerun()