shan gao commited on
Commit
d2aac9d
·
1 Parent(s): a90d714
Files changed (1) hide show
  1. src/streamlit_app.py +8 -18
src/streamlit_app.py CHANGED
@@ -20,7 +20,7 @@ from typing import Dict, List, Optional, Tuple
20
 
21
  import requests
22
  import streamlit as st
23
- from llama_index.core import Settings, SimpleDirectoryReader, SummaryIndex, VectorStoreIndex
24
  from llama_index.core.agent.workflow import FunctionAgent
25
  from llama_index.core.node_parser import SentenceSplitter
26
  from llama_index.core.objects import ObjectIndex
@@ -48,21 +48,13 @@ DOC_SOURCES: Dict[str, Tuple[str, str, str]] = {
48
  ),
49
  }
50
  BASE_SYSTEM_PROMPT = (
51
- "You are an agent designed to answer queries over a set of RYBREVANT documents. "
52
- "Always use the tools provided to answer a question. Do not rely on prior knowledge or any information "
53
- "outside the provided documents. "
54
- "When responding, keep answers concise, always mention the source with exact document + page "
55
- "(for example, 'PI p.12' or 'brochure p.5'), and end with a brief safety disclaimer "
56
- "('Not medical advice; consult your healthcare professional'). "
57
- "If a question cannot be fully answered from the documents, clearly say so and recommend consulting a "
58
- "healthcare professional rather than guessing or extrapolating. "
59
- "For detailed, safety-critical questions such as dosing, contraindications, warnings, or administration "
60
- "details, always prefer the vector tools over summary tools so that answers are grounded in the exact text."
61
  )
62
 
63
- # Global embedding model used across all indices
64
- Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-large")
65
-
66
 
67
  def _ensure_data_files() -> None:
68
  """Download source PDFs if they are missing."""
@@ -112,7 +104,8 @@ def _build_tools_for_doc(file_path: Path, name: str):
112
  if not nodes:
113
  raise ValueError(f"No text nodes parsed from {file_path}. Check parser credentials or PDF availability.")
114
 
115
- vector_index = VectorStoreIndex(nodes)
 
116
 
117
  def vector_query(query: str, page_numbers: Optional[List[int]] = None) -> str:
118
  """Grounded Q&A with optional page filters + citations.
@@ -222,9 +215,6 @@ def main() -> None:
222
  "- Data/parsing cached in this Space runtime"
223
  )
224
  st.divider()
225
- if st.button("Clear conversation", type="secondary"):
226
- st.session_state.pop("messages", None)
227
- st.experimental_rerun()
228
  st.markdown("Need to deploy? Push this app to Hugging Face Spaces with your API keys as secrets.")
229
 
230
  has_keys = _require_env("OPENAI_API_KEY") and _require_env("LLAMA_CLOUD_API_KEY")
 
20
 
21
  import requests
22
  import streamlit as st
23
+ from llama_index.core import SimpleDirectoryReader, SummaryIndex, VectorStoreIndex
24
  from llama_index.core.agent.workflow import FunctionAgent
25
  from llama_index.core.node_parser import SentenceSplitter
26
  from llama_index.core.objects import ObjectIndex
 
48
  ),
49
  }
50
  BASE_SYSTEM_PROMPT = (
51
+ "You are an agent designed to answer queries over a set of RYBREVANT documents." \
52
+ "Please always use the tools provided to answer a question. Do not rely on prior knowledge." \
53
+ "When responding, keep answers concise, always mention the source: exact document + page "
54
+ "(e.g., 'PI p.12' or 'brochure p.5'), and end with a brief safety disclaimer "
55
+ "('Not medical advice; consult your healthcare professional')."
 
 
 
 
 
56
  )
57
 
 
 
 
58
 
59
  def _ensure_data_files() -> None:
60
  """Download source PDFs if they are missing."""
 
104
  if not nodes:
105
  raise ValueError(f"No text nodes parsed from {file_path}. Check parser credentials or PDF availability.")
106
 
107
+ embed_model = OpenAIEmbedding(model="text-embedding-3-large")
108
+ vector_index = VectorStoreIndex(nodes, embed_model=embed_model)
109
 
110
  def vector_query(query: str, page_numbers: Optional[List[int]] = None) -> str:
111
  """Grounded Q&A with optional page filters + citations.
 
215
  "- Data/parsing cached in this Space runtime"
216
  )
217
  st.divider()
 
 
 
218
  st.markdown("Need to deploy? Push this app to Hugging Face Spaces with your API keys as secrets.")
219
 
220
  has_keys = _require_env("OPENAI_API_KEY") and _require_env("LLAMA_CLOUD_API_KEY")