Spaces:
Sleeping
Sleeping
Update src/app.py
Browse filesupdated flattening functionality
- src/app.py +84 -6
src/app.py
CHANGED
|
@@ -43,6 +43,47 @@ if "active_index" not in st.session_state: st.session_state.active_index = None
|
|
| 43 |
if "last_prompt_sent" not in st.session_state: st.session_state.last_prompt_sent = ""
|
| 44 |
if "last_context_used" not in st.session_state: st.session_state.last_context_used = ""
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
# --- HELPER FUNCTIONS ---
|
| 47 |
def query_model_universal(messages, max_tokens, model_choice, user_key=None):
|
| 48 |
"""Unified router for Chat, Tools, and Quiz."""
|
|
@@ -338,9 +379,12 @@ with tab2:
|
|
| 338 |
with c2:
|
| 339 |
use_vision = st.toggle("ποΈ Enable Vision Mode")
|
| 340 |
if use_vision and "GPT-4o" not in opts: st.warning("Vision requires OpenAI.")
|
|
|
|
| 341 |
if uploaded_file:
|
| 342 |
temp_path = rag_engine.save_uploaded_file(uploaded_file, st.session_state.username)
|
| 343 |
col_a, col_b, col_c = st.columns(3)
|
|
|
|
|
|
|
| 344 |
with col_a:
|
| 345 |
chunk_strategy = st.selectbox("Chunking Strategy", ["paragraph", "token"])
|
| 346 |
if st.button("π₯ Add to KB", type="primary"):
|
|
@@ -350,6 +394,8 @@ with tab2:
|
|
| 350 |
ok, msg = rag_engine.ingest_file(temp_path, st.session_state.username, st.session_state.active_index, st.session_state.active_embed_model, chunk_strategy)
|
| 351 |
if ok: tracker.upload_user_db(st.session_state.username); st.success(msg)
|
| 352 |
else: st.error(msg)
|
|
|
|
|
|
|
| 353 |
with col_b:
|
| 354 |
st.write(""); st.write("")
|
| 355 |
if st.button("π Summarize"):
|
|
@@ -364,22 +410,53 @@ with tab2:
|
|
| 364 |
msgs = [{"role":"user", "content": prompt}]
|
| 365 |
summ, usage = query_model_universal(msgs, 1000, model_choice, st.session_state.get("user_openai_key"))
|
| 366 |
st.subheader("Summary"); st.markdown(summ)
|
|
|
|
|
|
|
| 367 |
with col_c:
|
| 368 |
st.write(""); st.write("")
|
| 369 |
if "flattened_result" not in st.session_state: st.session_state.flattened_result = None
|
|
|
|
| 370 |
if st.button("π Flatten"):
|
| 371 |
with st.spinner("Flattening..."):
|
| 372 |
key = st.session_state.get("user_openai_key") or OPENAI_KEY
|
|
|
|
|
|
|
| 373 |
with open(temp_path, "rb") as f:
|
| 374 |
class Wrapper:
|
| 375 |
def __init__(self, data, n): self.data=data; self.name=n
|
| 376 |
def read(self): return self.data
|
| 377 |
raw = doc_loader.extract_text_from_file(Wrapper(f.read(), uploaded_file.name), use_vision=use_vision, api_key=key)
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 383 |
st.divider()
|
| 384 |
st.subheader("Database Management")
|
| 385 |
c1, c2 = st.columns([2, 1])
|
|
@@ -392,6 +469,7 @@ with tab2:
|
|
| 392 |
ok, msg = rag_engine.rebuild_cache_from_pinecone(st.session_state.username, st.session_state.active_index)
|
| 393 |
if ok: st.success(msg); time.sleep(1); st.rerun()
|
| 394 |
else: st.error(msg)
|
|
|
|
| 395 |
docs = rag_engine.list_documents(st.session_state.username)
|
| 396 |
if docs:
|
| 397 |
for d in docs:
|
|
@@ -403,7 +481,7 @@ with tab2:
|
|
| 403 |
rag_engine.delete_document(st.session_state.username, d['source'], st.session_state.active_index)
|
| 404 |
tracker.upload_user_db(st.session_state.username); st.rerun()
|
| 405 |
else: st.warning("Cache Empty.")
|
| 406 |
-
|
| 407 |
# === TAB 3: QUIZ MODE ===
|
| 408 |
with tab3:
|
| 409 |
st.header("β Qualification Board Simulator")
|
|
|
|
| 43 |
if "last_prompt_sent" not in st.session_state: st.session_state.last_prompt_sent = ""
|
| 44 |
if "last_context_used" not in st.session_state: st.session_state.last_context_used = ""
|
| 45 |
|
| 46 |
+
# --- FLATTENER LOGIC ---
|
| 47 |
+
class OutlineProcessor:
|
| 48 |
+
"""Parses text outlines for the Flattener tool."""
|
| 49 |
+
def __init__(self, file_content):
|
| 50 |
+
self.raw_lines = file_content.split('\n')
|
| 51 |
+
|
| 52 |
+
def _is_list_item(self, line):
|
| 53 |
+
pattern = r"^\s*(\d+\.|[a-zA-Z]\.|-|\*)\s+"
|
| 54 |
+
return bool(re.match(pattern, line))
|
| 55 |
+
|
| 56 |
+
def _merge_multiline_items(self):
|
| 57 |
+
merged_lines = []
|
| 58 |
+
for line in self.raw_lines:
|
| 59 |
+
stripped = line.strip()
|
| 60 |
+
if not stripped: continue
|
| 61 |
+
if not merged_lines:
|
| 62 |
+
merged_lines.append(line)
|
| 63 |
+
continue
|
| 64 |
+
if not self._is_list_item(line):
|
| 65 |
+
merged_lines[-1] = merged_lines[-1].rstrip() + " " + stripped
|
| 66 |
+
else:
|
| 67 |
+
merged_lines.append(line)
|
| 68 |
+
return merged_lines
|
| 69 |
+
|
| 70 |
+
def parse(self):
|
| 71 |
+
clean_lines = self._merge_multiline_items()
|
| 72 |
+
stack = []
|
| 73 |
+
results = []
|
| 74 |
+
for line in clean_lines:
|
| 75 |
+
stripped = line.strip()
|
| 76 |
+
indent = len(line) - len(line.lstrip())
|
| 77 |
+
while stack and stack[-1]['indent'] >= indent:
|
| 78 |
+
stack.pop()
|
| 79 |
+
stack.append({'indent': indent, 'text': stripped})
|
| 80 |
+
if len(stack) > 1:
|
| 81 |
+
context_str = " > ".join([item['text'] for item in stack[:-1]])
|
| 82 |
+
else:
|
| 83 |
+
context_str = "ROOT"
|
| 84 |
+
results.append({"context": context_str, "target": stripped})
|
| 85 |
+
return results
|
| 86 |
+
|
| 87 |
# --- HELPER FUNCTIONS ---
|
| 88 |
def query_model_universal(messages, max_tokens, model_choice, user_key=None):
|
| 89 |
"""Unified router for Chat, Tools, and Quiz."""
|
|
|
|
| 379 |
with c2:
|
| 380 |
use_vision = st.toggle("ποΈ Enable Vision Mode")
|
| 381 |
if use_vision and "GPT-4o" not in opts: st.warning("Vision requires OpenAI.")
|
| 382 |
+
|
| 383 |
if uploaded_file:
|
| 384 |
temp_path = rag_engine.save_uploaded_file(uploaded_file, st.session_state.username)
|
| 385 |
col_a, col_b, col_c = st.columns(3)
|
| 386 |
+
|
| 387 |
+
# COLUMN A: Ingest
|
| 388 |
with col_a:
|
| 389 |
chunk_strategy = st.selectbox("Chunking Strategy", ["paragraph", "token"])
|
| 390 |
if st.button("π₯ Add to KB", type="primary"):
|
|
|
|
| 394 |
ok, msg = rag_engine.ingest_file(temp_path, st.session_state.username, st.session_state.active_index, st.session_state.active_embed_model, chunk_strategy)
|
| 395 |
if ok: tracker.upload_user_db(st.session_state.username); st.success(msg)
|
| 396 |
else: st.error(msg)
|
| 397 |
+
|
| 398 |
+
# COLUMN B: Summarize
|
| 399 |
with col_b:
|
| 400 |
st.write(""); st.write("")
|
| 401 |
if st.button("π Summarize"):
|
|
|
|
| 410 |
msgs = [{"role":"user", "content": prompt}]
|
| 411 |
summ, usage = query_model_universal(msgs, 1000, model_choice, st.session_state.get("user_openai_key"))
|
| 412 |
st.subheader("Summary"); st.markdown(summ)
|
| 413 |
+
|
| 414 |
+
# COLUMN C: Flatten (THE FIX)
|
| 415 |
with col_c:
|
| 416 |
st.write(""); st.write("")
|
| 417 |
if "flattened_result" not in st.session_state: st.session_state.flattened_result = None
|
| 418 |
+
|
| 419 |
if st.button("π Flatten"):
|
| 420 |
with st.spinner("Flattening..."):
|
| 421 |
key = st.session_state.get("user_openai_key") or OPENAI_KEY
|
| 422 |
+
|
| 423 |
+
# 1. Read File
|
| 424 |
with open(temp_path, "rb") as f:
|
| 425 |
class Wrapper:
|
| 426 |
def __init__(self, data, n): self.data=data; self.name=n
|
| 427 |
def read(self): return self.data
|
| 428 |
raw = doc_loader.extract_text_from_file(Wrapper(f.read(), uploaded_file.name), use_vision=use_vision, api_key=key)
|
| 429 |
+
|
| 430 |
+
# 2. Parse Outline (This was missing logic previously)
|
| 431 |
+
proc = OutlineProcessor(raw)
|
| 432 |
+
items = proc.parse()
|
| 433 |
+
|
| 434 |
+
# 3. Process Items
|
| 435 |
+
out_txt = []
|
| 436 |
+
bar = st.progress(0)
|
| 437 |
+
for i, item in enumerate(items):
|
| 438 |
+
p = f"Context: {item['context']}\nTarget: {item['target']}\nRewrite as one sentence."
|
| 439 |
+
m = [{"role":"user", "content": p}]
|
| 440 |
+
res, _ = query_model_universal(m, 300, model_choice, st.session_state.get("user_openai_key"))
|
| 441 |
+
out_txt.append(res)
|
| 442 |
+
bar.progress((i+1)/len(items))
|
| 443 |
+
|
| 444 |
+
final_flattened_text = "\n".join(out_txt)
|
| 445 |
+
st.session_state.flattened_result = {"text": final_flattened_text, "source": f"{uploaded_file.name}_flat"}
|
| 446 |
+
st.rerun()
|
| 447 |
+
|
| 448 |
+
if st.session_state.flattened_result:
|
| 449 |
+
res = st.session_state.flattened_result
|
| 450 |
+
st.success("Complete!")
|
| 451 |
+
st.text_area("Result", res["text"], height=200)
|
| 452 |
+
if st.button("π₯ Index Flat"):
|
| 453 |
+
if not st.session_state.active_index: st.error("Select Index.")
|
| 454 |
+
else:
|
| 455 |
+
with st.spinner("Indexing..."):
|
| 456 |
+
ok, msg = rag_engine.process_and_add_text(res["text"], res["source"], st.session_state.username, st.session_state.active_index)
|
| 457 |
+
if ok: tracker.upload_user_db(st.session_state.username); st.success(msg)
|
| 458 |
+
else: st.error(msg)
|
| 459 |
+
|
| 460 |
st.divider()
|
| 461 |
st.subheader("Database Management")
|
| 462 |
c1, c2 = st.columns([2, 1])
|
|
|
|
| 469 |
ok, msg = rag_engine.rebuild_cache_from_pinecone(st.session_state.username, st.session_state.active_index)
|
| 470 |
if ok: st.success(msg); time.sleep(1); st.rerun()
|
| 471 |
else: st.error(msg)
|
| 472 |
+
|
| 473 |
docs = rag_engine.list_documents(st.session_state.username)
|
| 474 |
if docs:
|
| 475 |
for d in docs:
|
|
|
|
| 481 |
rag_engine.delete_document(st.session_state.username, d['source'], st.session_state.active_index)
|
| 482 |
tracker.upload_user_db(st.session_state.username); st.rerun()
|
| 483 |
else: st.warning("Cache Empty.")
|
| 484 |
+
|
| 485 |
# === TAB 3: QUIZ MODE ===
|
| 486 |
with tab3:
|
| 487 |
st.header("β Qualification Board Simulator")
|