File size: 8,302 Bytes
3827871 ea4caf0 59992e2 ea4caf0 59992e2 ea4caf0 59992e2 3827871 ea4caf0 3827871 ea4caf0 59992e2 ea4caf0 3827871 ea4caf0 3827871 ea4caf0 3827871 ea4caf0 3827871 ea4caf0 3827871 ea4caf0 3827871 ea4caf0 3827871 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 | # =================================================================================
# app.py: Main application file for the Streamlit web interface
# =================================================================================
import streamlit as st
from dotenv import load_dotenv
from huggingface_hub import snapshot_download
import os
# === LlamaIndex persist kontrolü ve dataset'ten indirme yardımcıları ===
LLAMA_INDEX_DATASET_ID = os.getenv("HF_INDEX_DATASET_ID", "alperensn/llamaIndexVectorBase_fda")
LLAMA_INDEX_SUBDIR = os.getenv("HF_INDEX_SUBDIR", "").strip() # dataset içinde alt klasör kullanıyorsan burada belirt
# Eski ve yeni (default__) adlandırmaları birlikte kontrol edelim
MARKERS_CLASSIC = {"index_store.json", "docstore.json", "graph_store.json", "default__vector_store.json", "image__vector_store.json" }
MARKERS_DEFAULT = {"default__index_store.json", "default__docstore.json", "default__vector_store.json", "default_image__vector_store.json","default__graph_store.json"}
def _persist_path(base_dir: str) -> str:
return os.path.join(base_dir, LLAMA_INDEX_SUBDIR) if LLAMA_INDEX_SUBDIR else base_dir
def llama_index_exists(base_dir: str) -> bool:
path = _persist_path(base_dir)
if not os.path.isdir(path):
return False
files = set(os.listdir(path))
return (MARKERS_CLASSIC.issubset(files) or MARKERS_DEFAULT.issubset(files))
def download_llama_index_if_needed(base_dir: str):
path = _persist_path(base_dir)
os.makedirs(path, exist_ok=True)
if llama_index_exists(base_dir):
return
snapshot_download(
repo_id=LLAMA_INDEX_DATASET_ID,
repo_type="dataset",
local_dir=path,
local_dir_use_symlinks=False,
)
# İndirilen LlamaIndex persist klasörünü altlarda aramak gerekirse:
def find_llama_index_dir(base_dir: str) -> str:
wanted_sets = [MARKERS_CLASSIC, MARKERS_DEFAULT]
if os.path.isdir(base_dir):
files = set(os.listdir(base_dir))
if any(ws.issubset(files) for ws in wanted_sets):
return base_dir
for root, _, files in os.walk(base_dir):
files = set(files)
if any(ws.issubset(files) for ws in wanted_sets):
return root
return base_dir
# Load environment variables from .env file
load_dotenv()
# Import the modules we've created
import config
import rag_pipeline # Now using the LlamaIndex pipeline
# --- Page Configuration ---
st.set_page_config(
page_title="PharmaBot",
page_icon="🤖",
layout="wide",
initial_sidebar_state="expanded",
)
# --- State Management ---
def initialize_state():
"""Initializes session state variables."""
if "messages" not in st.session_state:
st.session_state.messages = [{"role": "assistant", "content": "Welcome to PharmaBot! How can I help you today?"}]
if "query_engine" not in st.session_state:
st.session_state.query_engine = None
if "initialized" not in st.session_state:
st.session_state.initialized = False
# --- UI Components ---
def setup_sidebar():
"""Sets up the sidebar with app information."""
with st.sidebar:
st.header("About PharmaBot")
st.info(
"PharmaBot is an AI assistant designed to answer questions about "
"pharmaceuticals based on a knowledge base of RAG documents. "
"It uses a Retrieval-Augmented Generation (RAG) pipeline to provide accurate, "
"context-aware answers."
)
st.warning("**Disclaimer: I am an AI assistant, not a medical professional. This information is for educational purposes only. Please consult with a qualified healthcare provider for any health concerns or before making any medical decisions.**"
)
st.markdown("---")
st.header("Technical Details")
st.markdown(
f"""
- **LLM Model:** `{config.LLM_MODEL_ID}`
- **Embedding Model:** `{config.EMBEDDING_MODEL_NAME}`
- **Vector Type:** `LLama Index Vector Store`
- **Vector Store:** `{config.VECTOR_STORE_PATH}`
"""
)
def display_chat_history():
"""Displays the chat history."""
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.write(message["content"])
def handle_user_input(chat_engine):
"""Handles user input and displays the response."""
if prompt := st.chat_input("Ask me anything about pharmaceuticals..."):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.write(prompt)
with st.chat_message("assistant"):
with st.spinner("Thinking..."):
response = chat_engine.chat(prompt)
st.write(str(response))
st.session_state.messages.append({"role": "assistant", "content": str(response)})
import time
from build_knowledge_base import build_vector_store
import os
# --- Main Application Logic ---
def main():
"""Main function to run the Streamlit app."""
st.set_page_config(page_title="PharmaBot Assistant", page_icon="💊")
initialize_state()
st.title("💊 PharmaBot: Your AI Pharmaceutical Assistant")
setup_sidebar()
# Initialize the RAG pipeline if it hasn't been already
if not st.session_state.initialized:
# 1) Önce dataset'ten persist edilmiş index'i yerel klasöre indirmeyi dene
if not llama_index_exists(config.LLAMA_INDEX_STORE_PATH):
with st.status("Knowledge base not found locally. Downloading from dataset...", expanded=True) as status:
try:
status.write(f"Downloading persisted index from: {LLAMA_INDEX_DATASET_ID}")
download_llama_index_if_needed(config.LLAMA_INDEX_STORE_PATH)
detected_dir = find_llama_index_dir(config.LLAMA_INDEX_STORE_PATH)
if detected_dir != config.LLAMA_INDEX_STORE_PATH:
config.LLAMA_INDEX_STORE_PATH = detected_dir
status.update(label="Index downloaded from dataset.", state="complete", expanded=False)
time.sleep(1)
except Exception as e:
status.update(label="Dataset download failed. Falling back to local build...", state="running", expanded=True)
try:
status.write("This is a one-time setup and may take a few minutes...")
build_vector_store()
status.update(label="Knowledge base built successfully!", state="complete", expanded=False)
time.sleep(1)
except Exception as be:
status.update(label="Build Failed", state="error", expanded=True)
st.error(f"An error occurred while preparing the knowledge base:\n- dataset error: {e}\n- build error: {be}")
st.stop()
# 2) RAG pipeline init
with st.status("Initializing the RAG pipeline...", expanded=True) as status:
try:
status.write("Step 1/3: Initializing LLM and embedding models...")
rag_pipeline.initialize_llm_and_embed_model()
status.write("Step 2/3: Loading vector index from storage...")
index = rag_pipeline.load_vector_index() # load_vector_index() zaten config.LLAMA_INDEX_STORE_PATH'ten okuyorsa değişiklik gerekmez
status.write("Step 3/3: Building the conversational chat engine...")
st.session_state.query_engine = rag_pipeline.build_query_engine(index)
st.session_state.initialized = True
status.update(label="Initialization Complete!", state="complete", expanded=False)
time.sleep(1)
except Exception as e:
status.update(label="Initialization Failed", state="error")
st.error(f"An unexpected error occurred during initialization: {e}")
return
st.rerun()
# Display chat and handle input if initialized
if st.session_state.initialized:
display_chat_history()
handle_user_input(st.session_state.query_engine)
if __name__ == "__main__":
main()
|