NavyDevilDoc commited on
Commit
c23afd4
Β·
verified Β·
1 Parent(s): ffc0162

Update src/app.py

Browse files

updated flattening functionality

Files changed (1) hide show
  1. src/app.py +84 -6
src/app.py CHANGED
@@ -43,6 +43,47 @@ if "active_index" not in st.session_state: st.session_state.active_index = None
43
  if "last_prompt_sent" not in st.session_state: st.session_state.last_prompt_sent = ""
44
  if "last_context_used" not in st.session_state: st.session_state.last_context_used = ""
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  # --- HELPER FUNCTIONS ---
47
  def query_model_universal(messages, max_tokens, model_choice, user_key=None):
48
  """Unified router for Chat, Tools, and Quiz."""
@@ -338,9 +379,12 @@ with tab2:
338
  with c2:
339
  use_vision = st.toggle("πŸ‘οΈ Enable Vision Mode")
340
  if use_vision and "GPT-4o" not in opts: st.warning("Vision requires OpenAI.")
 
341
  if uploaded_file:
342
  temp_path = rag_engine.save_uploaded_file(uploaded_file, st.session_state.username)
343
  col_a, col_b, col_c = st.columns(3)
 
 
344
  with col_a:
345
  chunk_strategy = st.selectbox("Chunking Strategy", ["paragraph", "token"])
346
  if st.button("πŸ“₯ Add to KB", type="primary"):
@@ -350,6 +394,8 @@ with tab2:
350
  ok, msg = rag_engine.ingest_file(temp_path, st.session_state.username, st.session_state.active_index, st.session_state.active_embed_model, chunk_strategy)
351
  if ok: tracker.upload_user_db(st.session_state.username); st.success(msg)
352
  else: st.error(msg)
 
 
353
  with col_b:
354
  st.write(""); st.write("")
355
  if st.button("πŸ“ Summarize"):
@@ -364,22 +410,53 @@ with tab2:
364
  msgs = [{"role":"user", "content": prompt}]
365
  summ, usage = query_model_universal(msgs, 1000, model_choice, st.session_state.get("user_openai_key"))
366
  st.subheader("Summary"); st.markdown(summ)
 
 
367
  with col_c:
368
  st.write(""); st.write("")
369
  if "flattened_result" not in st.session_state: st.session_state.flattened_result = None
 
370
  if st.button("πŸ“„ Flatten"):
371
  with st.spinner("Flattening..."):
372
  key = st.session_state.get("user_openai_key") or OPENAI_KEY
 
 
373
  with open(temp_path, "rb") as f:
374
  class Wrapper:
375
  def __init__(self, data, n): self.data=data; self.name=n
376
  def read(self): return self.data
377
  raw = doc_loader.extract_text_from_file(Wrapper(f.read(), uploaded_file.name), use_vision=use_vision, api_key=key)
378
- # Flattener Logic simplified for view
379
- proc = admin_panel.OutlineProcessor(raw) if hasattr(admin_panel, 'OutlineProcessor') else None # Note: You had OutlineProcessor in main, keep it if needed or move to logic
380
- # Assuming logic is same as before, keeping brevity:
381
- st.warning("Flattening logic requires the class definition above, ensure it is preserved.")
382
- # Re-inserting the OutlineProcessor class at top of file for safety
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
  st.divider()
384
  st.subheader("Database Management")
385
  c1, c2 = st.columns([2, 1])
@@ -392,6 +469,7 @@ with tab2:
392
  ok, msg = rag_engine.rebuild_cache_from_pinecone(st.session_state.username, st.session_state.active_index)
393
  if ok: st.success(msg); time.sleep(1); st.rerun()
394
  else: st.error(msg)
 
395
  docs = rag_engine.list_documents(st.session_state.username)
396
  if docs:
397
  for d in docs:
@@ -403,7 +481,7 @@ with tab2:
403
  rag_engine.delete_document(st.session_state.username, d['source'], st.session_state.active_index)
404
  tracker.upload_user_db(st.session_state.username); st.rerun()
405
  else: st.warning("Cache Empty.")
406
-
407
  # === TAB 3: QUIZ MODE ===
408
  with tab3:
409
  st.header("βš“ Qualification Board Simulator")
 
43
  if "last_prompt_sent" not in st.session_state: st.session_state.last_prompt_sent = ""
44
  if "last_context_used" not in st.session_state: st.session_state.last_context_used = ""
45
 
46
+ # --- FLATTENER LOGIC ---
47
+ class OutlineProcessor:
48
+ """Parses text outlines for the Flattener tool."""
49
+ def __init__(self, file_content):
50
+ self.raw_lines = file_content.split('\n')
51
+
52
+ def _is_list_item(self, line):
53
+ pattern = r"^\s*(\d+\.|[a-zA-Z]\.|-|\*)\s+"
54
+ return bool(re.match(pattern, line))
55
+
56
+ def _merge_multiline_items(self):
57
+ merged_lines = []
58
+ for line in self.raw_lines:
59
+ stripped = line.strip()
60
+ if not stripped: continue
61
+ if not merged_lines:
62
+ merged_lines.append(line)
63
+ continue
64
+ if not self._is_list_item(line):
65
+ merged_lines[-1] = merged_lines[-1].rstrip() + " " + stripped
66
+ else:
67
+ merged_lines.append(line)
68
+ return merged_lines
69
+
70
+ def parse(self):
71
+ clean_lines = self._merge_multiline_items()
72
+ stack = []
73
+ results = []
74
+ for line in clean_lines:
75
+ stripped = line.strip()
76
+ indent = len(line) - len(line.lstrip())
77
+ while stack and stack[-1]['indent'] >= indent:
78
+ stack.pop()
79
+ stack.append({'indent': indent, 'text': stripped})
80
+ if len(stack) > 1:
81
+ context_str = " > ".join([item['text'] for item in stack[:-1]])
82
+ else:
83
+ context_str = "ROOT"
84
+ results.append({"context": context_str, "target": stripped})
85
+ return results
86
+
87
  # --- HELPER FUNCTIONS ---
88
  def query_model_universal(messages, max_tokens, model_choice, user_key=None):
89
  """Unified router for Chat, Tools, and Quiz."""
 
379
  with c2:
380
  use_vision = st.toggle("πŸ‘οΈ Enable Vision Mode")
381
  if use_vision and "GPT-4o" not in opts: st.warning("Vision requires OpenAI.")
382
+
383
  if uploaded_file:
384
  temp_path = rag_engine.save_uploaded_file(uploaded_file, st.session_state.username)
385
  col_a, col_b, col_c = st.columns(3)
386
+
387
+ # COLUMN A: Ingest
388
  with col_a:
389
  chunk_strategy = st.selectbox("Chunking Strategy", ["paragraph", "token"])
390
  if st.button("πŸ“₯ Add to KB", type="primary"):
 
394
  ok, msg = rag_engine.ingest_file(temp_path, st.session_state.username, st.session_state.active_index, st.session_state.active_embed_model, chunk_strategy)
395
  if ok: tracker.upload_user_db(st.session_state.username); st.success(msg)
396
  else: st.error(msg)
397
+
398
+ # COLUMN B: Summarize
399
  with col_b:
400
  st.write(""); st.write("")
401
  if st.button("πŸ“ Summarize"):
 
410
  msgs = [{"role":"user", "content": prompt}]
411
  summ, usage = query_model_universal(msgs, 1000, model_choice, st.session_state.get("user_openai_key"))
412
  st.subheader("Summary"); st.markdown(summ)
413
+
414
+ # COLUMN C: Flatten (THE FIX)
415
  with col_c:
416
  st.write(""); st.write("")
417
  if "flattened_result" not in st.session_state: st.session_state.flattened_result = None
418
+
419
  if st.button("πŸ“„ Flatten"):
420
  with st.spinner("Flattening..."):
421
  key = st.session_state.get("user_openai_key") or OPENAI_KEY
422
+
423
+ # 1. Read File
424
  with open(temp_path, "rb") as f:
425
  class Wrapper:
426
  def __init__(self, data, n): self.data=data; self.name=n
427
  def read(self): return self.data
428
  raw = doc_loader.extract_text_from_file(Wrapper(f.read(), uploaded_file.name), use_vision=use_vision, api_key=key)
429
+
430
+ # 2. Parse Outline (This was missing logic previously)
431
+ proc = OutlineProcessor(raw)
432
+ items = proc.parse()
433
+
434
+ # 3. Process Items
435
+ out_txt = []
436
+ bar = st.progress(0)
437
+ for i, item in enumerate(items):
438
+ p = f"Context: {item['context']}\nTarget: {item['target']}\nRewrite as one sentence."
439
+ m = [{"role":"user", "content": p}]
440
+ res, _ = query_model_universal(m, 300, model_choice, st.session_state.get("user_openai_key"))
441
+ out_txt.append(res)
442
+ bar.progress((i+1)/len(items))
443
+
444
+ final_flattened_text = "\n".join(out_txt)
445
+ st.session_state.flattened_result = {"text": final_flattened_text, "source": f"{uploaded_file.name}_flat"}
446
+ st.rerun()
447
+
448
+ if st.session_state.flattened_result:
449
+ res = st.session_state.flattened_result
450
+ st.success("Complete!")
451
+ st.text_area("Result", res["text"], height=200)
452
+ if st.button("πŸ“₯ Index Flat"):
453
+ if not st.session_state.active_index: st.error("Select Index.")
454
+ else:
455
+ with st.spinner("Indexing..."):
456
+ ok, msg = rag_engine.process_and_add_text(res["text"], res["source"], st.session_state.username, st.session_state.active_index)
457
+ if ok: tracker.upload_user_db(st.session_state.username); st.success(msg)
458
+ else: st.error(msg)
459
+
460
  st.divider()
461
  st.subheader("Database Management")
462
  c1, c2 = st.columns([2, 1])
 
469
  ok, msg = rag_engine.rebuild_cache_from_pinecone(st.session_state.username, st.session_state.active_index)
470
  if ok: st.success(msg); time.sleep(1); st.rerun()
471
  else: st.error(msg)
472
+
473
  docs = rag_engine.list_documents(st.session_state.username)
474
  if docs:
475
  for d in docs:
 
481
  rag_engine.delete_document(st.session_state.username, d['source'], st.session_state.active_index)
482
  tracker.upload_user_db(st.session_state.username); st.rerun()
483
  else: st.warning("Cache Empty.")
484
+
485
  # === TAB 3: QUIZ MODE ===
486
  with tab3:
487
  st.header("βš“ Qualification Board Simulator")