AbhijitClemson commited on
Commit
ebc8022
Β·
verified Β·
1 Parent(s): b2afd40

Update page_files/categorized/Backend/PDF_DataExtraction.py

Browse files
page_files/categorized/Backend/PDF_DataExtraction.py CHANGED
@@ -2018,6 +2018,18 @@ def run_pipeline(
2018
  doi_override: str = "",
2019
  progress_callback: Any = None,
2020
  ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, List[Chunk], List[str], Dict]:
 
 
 
 
 
 
 
 
 
 
 
 
2021
  global GEMINI_MODEL, GEMINI_API_URL
2022
  if not GEMINI_MODEL:
2023
  key = os.getenv("GEMINI_API_KEY", "")
@@ -2171,13 +2183,13 @@ def _run_streamlit():
2171
  with st.sidebar:
2172
  st.header("βš™οΈ Settings")
2173
  st.divider()
2174
- st.markdown(f"**Gemini model:** `{GEMINI_MODEL}`")
2175
- st.markdown(f"**GPT model:** `{GPT_MODEL}`")
2176
- st.markdown(f"**Embedder:** `{EMBED_MODEL_NAME}`")
2177
- st.markdown(f"**ChromaDB:** {'βœ…' if CHROMA_AVAILABLE else '❌ not installed'}")
2178
- st.markdown(f"**Docling:** {'βœ…' if DOCLING_AVAILABLE else '❌'}")
2179
- st.markdown(f"**Camelot:** {'βœ…' if CAMELOT_AVAILABLE else '❌'}")
2180
- st.markdown(f"**OCR:** {'βœ…' if OCR_AVAILABLE else '❌'}")
2181
  gemini_ok = bool(GEMINI_API_KEY)
2182
  gpt_ok = bool(OPENAI_API_KEY)
2183
  st.markdown(f"**Gemini API Key:** {'βœ…' if gemini_ok else '❌ GEMINI_API_KEY not set'}")
 
2018
  doi_override: str = "",
2019
  progress_callback: Any = None,
2020
  ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, List[Chunk], List[str], Dict]:
2021
+ """
2022
+ Full dual-LLM consensus pipeline.
2023
+
2024
+ Returns
2025
+ -------
2026
+ df_consensus : rows agreed by both LLMs ← primary output
2027
+ df_gemini : Gemini-only output
2028
+ df_gpt : GPT-only output
2029
+ all_chunks : all Chunk objects with scores
2030
+ api_errors : list of error strings
2031
+ meta : pipeline stats dict
2032
+ """
2033
  global GEMINI_MODEL, GEMINI_API_URL
2034
  if not GEMINI_MODEL:
2035
  key = os.getenv("GEMINI_API_KEY", "")
 
2183
  with st.sidebar:
2184
  st.header("βš™οΈ Settings")
2185
  st.divider()
2186
+ #st.markdown(f"**Gemini model:** `{GEMINI_MODEL}`")
2187
+ #st.markdown(f"**GPT model:** `{GPT_MODEL}`")
2188
+ #st.markdown(f"**Embedder:** `{EMBED_MODEL_NAME}`")
2189
+ #st.markdown(f"**ChromaDB:** {'βœ…' if CHROMA_AVAILABLE else '❌ not installed'}")
2190
+ #st.markdown(f"**Docling:** {'βœ…' if DOCLING_AVAILABLE else '❌'}")
2191
+ #st.markdown(f"**Camelot:** {'βœ…' if CAMELOT_AVAILABLE else '❌'}")
2192
+ #st.markdown(f"**OCR:** {'βœ…' if OCR_AVAILABLE else '❌'}")
2193
  gemini_ok = bool(GEMINI_API_KEY)
2194
  gpt_ok = bool(OPENAI_API_KEY)
2195
  st.markdown(f"**Gemini API Key:** {'βœ…' if gemini_ok else '❌ GEMINI_API_KEY not set'}")