Update page_files/categorized/Backend/PDF_DataExtraction.py
Browse files
page_files/categorized/Backend/PDF_DataExtraction.py
CHANGED
|
@@ -2018,6 +2018,18 @@ def run_pipeline(
|
|
| 2018 |
doi_override: str = "",
|
| 2019 |
progress_callback: Any = None,
|
| 2020 |
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, List[Chunk], List[str], Dict]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2021 |
global GEMINI_MODEL, GEMINI_API_URL
|
| 2022 |
if not GEMINI_MODEL:
|
| 2023 |
key = os.getenv("GEMINI_API_KEY", "")
|
|
@@ -2171,13 +2183,13 @@ def _run_streamlit():
|
|
| 2171 |
with st.sidebar:
|
| 2172 |
st.header("βοΈ Settings")
|
| 2173 |
st.divider()
|
| 2174 |
-
st.markdown(f"**Gemini model:** `{GEMINI_MODEL}`")
|
| 2175 |
-
st.markdown(f"**GPT model:** `{GPT_MODEL}`")
|
| 2176 |
-
st.markdown(f"**Embedder:** `{EMBED_MODEL_NAME}`")
|
| 2177 |
-
st.markdown(f"**ChromaDB:** {'β
' if CHROMA_AVAILABLE else 'β not installed'}")
|
| 2178 |
-
st.markdown(f"**Docling:** {'β
' if DOCLING_AVAILABLE else 'β'}")
|
| 2179 |
-
st.markdown(f"**Camelot:** {'β
' if CAMELOT_AVAILABLE else 'β'}")
|
| 2180 |
-
st.markdown(f"**OCR:** {'β
' if OCR_AVAILABLE else 'β'}")
|
| 2181 |
gemini_ok = bool(GEMINI_API_KEY)
|
| 2182 |
gpt_ok = bool(OPENAI_API_KEY)
|
| 2183 |
st.markdown(f"**Gemini API Key:** {'β
' if gemini_ok else 'β GEMINI_API_KEY not set'}")
|
|
|
|
| 2018 |
doi_override: str = "",
|
| 2019 |
progress_callback: Any = None,
|
| 2020 |
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, List[Chunk], List[str], Dict]:
|
| 2021 |
+
"""
|
| 2022 |
+
Full dual-LLM consensus pipeline.
|
| 2023 |
+
|
| 2024 |
+
Returns
|
| 2025 |
+
-------
|
| 2026 |
+
df_consensus : rows agreed by both LLMs β primary output
|
| 2027 |
+
df_gemini : Gemini-only output
|
| 2028 |
+
df_gpt : GPT-only output
|
| 2029 |
+
all_chunks : all Chunk objects with scores
|
| 2030 |
+
api_errors : list of error strings
|
| 2031 |
+
meta : pipeline stats dict
|
| 2032 |
+
"""
|
| 2033 |
global GEMINI_MODEL, GEMINI_API_URL
|
| 2034 |
if not GEMINI_MODEL:
|
| 2035 |
key = os.getenv("GEMINI_API_KEY", "")
|
|
|
|
| 2183 |
with st.sidebar:
|
| 2184 |
st.header("βοΈ Settings")
|
| 2185 |
st.divider()
|
| 2186 |
+
#st.markdown(f"**Gemini model:** `{GEMINI_MODEL}`")
|
| 2187 |
+
#st.markdown(f"**GPT model:** `{GPT_MODEL}`")
|
| 2188 |
+
#st.markdown(f"**Embedder:** `{EMBED_MODEL_NAME}`")
|
| 2189 |
+
#st.markdown(f"**ChromaDB:** {'β
' if CHROMA_AVAILABLE else 'β not installed'}")
|
| 2190 |
+
#st.markdown(f"**Docling:** {'β
' if DOCLING_AVAILABLE else 'β'}")
|
| 2191 |
+
#st.markdown(f"**Camelot:** {'β
' if CAMELOT_AVAILABLE else 'β'}")
|
| 2192 |
+
#st.markdown(f"**OCR:** {'β
' if OCR_AVAILABLE else 'β'}")
|
| 2193 |
gemini_ok = bool(GEMINI_API_KEY)
|
| 2194 |
gpt_ok = bool(OPENAI_API_KEY)
|
| 2195 |
st.markdown(f"**Gemini API Key:** {'β
' if gemini_ok else 'β GEMINI_API_KEY not set'}")
|