Spaces:

NavyDevilDoc
/

AI_Toolkit

Sleeping

App Files Files Community

NavyDevilDoc commited on Jan 18

Commit

c23afd4

verified ·

1 Parent(s): ffc0162

Update src/app.py

Browse files

updated flattening functionality

Files changed (1) hide show

src/app.py +84 -6

src/app.py CHANGED Viewed

@@ -43,6 +43,47 @@ if "active_index" not in st.session_state: st.session_state.active_index = None
 if "last_prompt_sent" not in st.session_state: st.session_state.last_prompt_sent = ""
 if "last_context_used" not in st.session_state: st.session_state.last_context_used = ""
 # --- HELPER FUNCTIONS ---
 def query_model_universal(messages, max_tokens, model_choice, user_key=None):
     """Unified router for Chat, Tools, and Quiz."""
@@ -338,9 +379,12 @@ with tab2:
     with c2:
         use_vision = st.toggle("👁️ Enable Vision Mode")
         if use_vision and "GPT-4o" not in opts: st.warning("Vision requires OpenAI.")
     if uploaded_file:
         temp_path = rag_engine.save_uploaded_file(uploaded_file, st.session_state.username)
         col_a, col_b, col_c = st.columns(3)
         with col_a:
             chunk_strategy = st.selectbox("Chunking Strategy", ["paragraph", "token"])
             if st.button("📥 Add to KB", type="primary"):
@@ -350,6 +394,8 @@ with tab2:
                         ok, msg = rag_engine.ingest_file(temp_path, st.session_state.username, st.session_state.active_index, st.session_state.active_embed_model, chunk_strategy)
                         if ok: tracker.upload_user_db(st.session_state.username); st.success(msg)
                         else: st.error(msg)
         with col_b:
             st.write(""); st.write("")
             if st.button("📝 Summarize"):
@@ -364,22 +410,53 @@ with tab2:
                     msgs = [{"role":"user", "content": prompt}]
                     summ, usage = query_model_universal(msgs, 1000, model_choice, st.session_state.get("user_openai_key"))
                     st.subheader("Summary"); st.markdown(summ)
         with col_c:
             st.write(""); st.write("")
             if "flattened_result" not in st.session_state: st.session_state.flattened_result = None
             if st.button("📄 Flatten"):
                 with st.spinner("Flattening..."):
                     key = st.session_state.get("user_openai_key") or OPENAI_KEY
                     with open(temp_path, "rb") as f:
                         class Wrapper:
                             def __init__(self, data, n): self.data=data; self.name=n
                             def read(self): return self.data
                         raw = doc_loader.extract_text_from_file(Wrapper(f.read(), uploaded_file.name), use_vision=use_vision, api_key=key)
-                    # Flattener Logic simplified for view
-                    proc = admin_panel.OutlineProcessor(raw) if hasattr(admin_panel, 'OutlineProcessor') else None # Note: You had OutlineProcessor in main, keep it if needed or move to logic
-                    # Assuming logic is same as before, keeping brevity:
-                    st.warning("Flattening logic requires the class definition above, ensure it is preserved.")
-                    # Re-inserting the OutlineProcessor class at top of file for safety
     st.divider()
     st.subheader("Database Management")
     c1, c2 = st.columns([2, 1])
@@ -392,6 +469,7 @@ with tab2:
                     ok, msg = rag_engine.rebuild_cache_from_pinecone(st.session_state.username, st.session_state.active_index)
                     if ok: st.success(msg); time.sleep(1); st.rerun()
                     else: st.error(msg)
     docs = rag_engine.list_documents(st.session_state.username)
     if docs:
         for d in docs:
@@ -403,7 +481,7 @@ with tab2:
                     rag_engine.delete_document(st.session_state.username, d['source'], st.session_state.active_index)
                     tracker.upload_user_db(st.session_state.username); st.rerun()
     else: st.warning("Cache Empty.")
 # === TAB 3: QUIZ MODE ===
 with tab3:
     st.header("⚓ Qualification Board Simulator")

 if "last_prompt_sent" not in st.session_state: st.session_state.last_prompt_sent = ""
 if "last_context_used" not in st.session_state: st.session_state.last_context_used = ""
+# --- FLATTENER LOGIC ---
+class OutlineProcessor:
+    """Parses text outlines for the Flattener tool."""
+    def __init__(self, file_content):
+        self.raw_lines = file_content.split('\n')
+    def _is_list_item(self, line):
+        pattern = r"^\s*(\d+\.|[a-zA-Z]\.|-|\*)\s+"
+        return bool(re.match(pattern, line))
+    def _merge_multiline_items(self):
+        merged_lines = []
+        for line in self.raw_lines:
+            stripped = line.strip()
+            if not stripped: continue
+            if not merged_lines:
+                merged_lines.append(line)
+                continue
+            if not self._is_list_item(line):
+                merged_lines[-1] = merged_lines[-1].rstrip() + " " + stripped
+            else:
+                merged_lines.append(line)
+        return merged_lines
+    def parse(self):
+        clean_lines = self._merge_multiline_items()
+        stack = []
+        results = []
+        for line in clean_lines:
+            stripped = line.strip()
+            indent = len(line) - len(line.lstrip())
+            while stack and stack[-1]['indent'] >= indent:
+                stack.pop()
+            stack.append({'indent': indent, 'text': stripped})
+            if len(stack) > 1:
+                context_str = " > ".join([item['text'] for item in stack[:-1]])
+            else:
+                context_str = "ROOT"
+            results.append({"context": context_str, "target": stripped})
+        return results
 # --- HELPER FUNCTIONS ---
 def query_model_universal(messages, max_tokens, model_choice, user_key=None):
     """Unified router for Chat, Tools, and Quiz."""
     with c2:
         use_vision = st.toggle("👁️ Enable Vision Mode")
         if use_vision and "GPT-4o" not in opts: st.warning("Vision requires OpenAI.")
     if uploaded_file:
         temp_path = rag_engine.save_uploaded_file(uploaded_file, st.session_state.username)
         col_a, col_b, col_c = st.columns(3)
+        # COLUMN A: Ingest
         with col_a:
             chunk_strategy = st.selectbox("Chunking Strategy", ["paragraph", "token"])
             if st.button("📥 Add to KB", type="primary"):
                         ok, msg = rag_engine.ingest_file(temp_path, st.session_state.username, st.session_state.active_index, st.session_state.active_embed_model, chunk_strategy)
                         if ok: tracker.upload_user_db(st.session_state.username); st.success(msg)
                         else: st.error(msg)
+        # COLUMN B: Summarize
         with col_b:
             st.write(""); st.write("")
             if st.button("📝 Summarize"):
                     msgs = [{"role":"user", "content": prompt}]
                     summ, usage = query_model_universal(msgs, 1000, model_choice, st.session_state.get("user_openai_key"))
                     st.subheader("Summary"); st.markdown(summ)
+        # COLUMN C: Flatten (THE FIX)
         with col_c:
             st.write(""); st.write("")
             if "flattened_result" not in st.session_state: st.session_state.flattened_result = None
             if st.button("📄 Flatten"):
                 with st.spinner("Flattening..."):
                     key = st.session_state.get("user_openai_key") or OPENAI_KEY
+                    # 1. Read File
                     with open(temp_path, "rb") as f:
                         class Wrapper:
                             def __init__(self, data, n): self.data=data; self.name=n
                             def read(self): return self.data
                         raw = doc_loader.extract_text_from_file(Wrapper(f.read(), uploaded_file.name), use_vision=use_vision, api_key=key)
+                    # 2. Parse Outline (This was missing logic previously)
+                    proc = OutlineProcessor(raw)
+                    items = proc.parse()
+                    # 3. Process Items
+                    out_txt = []
+                    bar = st.progress(0)
+                    for i, item in enumerate(items):
+                        p = f"Context: {item['context']}\nTarget: {item['target']}\nRewrite as one sentence."
+                        m = [{"role":"user", "content": p}]
+                        res, _ = query_model_universal(m, 300, model_choice, st.session_state.get("user_openai_key"))
+                        out_txt.append(res)
+                        bar.progress((i+1)/len(items))
+                    final_flattened_text = "\n".join(out_txt)
+                    st.session_state.flattened_result = {"text": final_flattened_text, "source": f"{uploaded_file.name}_flat"}
+                    st.rerun()
+            if st.session_state.flattened_result:
+                res = st.session_state.flattened_result
+                st.success("Complete!")
+                st.text_area("Result", res["text"], height=200)
+                if st.button("📥 Index Flat"):
+                    if not st.session_state.active_index: st.error("Select Index.")
+                    else:
+                        with st.spinner("Indexing..."):
+                            ok, msg = rag_engine.process_and_add_text(res["text"], res["source"], st.session_state.username, st.session_state.active_index)
+                            if ok: tracker.upload_user_db(st.session_state.username); st.success(msg)
+                            else: st.error(msg)
     st.divider()
     st.subheader("Database Management")
     c1, c2 = st.columns([2, 1])
                     ok, msg = rag_engine.rebuild_cache_from_pinecone(st.session_state.username, st.session_state.active_index)
                     if ok: st.success(msg); time.sleep(1); st.rerun()
                     else: st.error(msg)
     docs = rag_engine.list_documents(st.session_state.username)
     if docs:
         for d in docs:
                     rag_engine.delete_document(st.session_state.username, d['source'], st.session_state.active_index)
                     tracker.upload_user_db(st.session_state.username); st.rerun()
     else: st.warning("Cache Empty.")
 # === TAB 3: QUIZ MODE ===
 with tab3:
     st.header("⚓ Qualification Board Simulator")