Spaces:
Sleeping
Sleeping
File size: 12,180 Bytes
1f9fe23 c8e2f30 1f9fe23 87ff579 9848a2d 1f9fe23 87ff579 1f9fe23 87ff579 1f9fe23 87ff579 515d9a6 24b052a e1eb3b2 9848a2d 99e1a1c 515d9a6 1f9fe23 515d9a6 24b052a 515d9a6 1f9fe23 99e1a1c 1f9fe23 99e1a1c 1f9fe23 9848a2d 1f9fe23 87ff579 1f9fe23 99e1a1c 3eb90d8 9848a2d 3eb90d8 99e1a1c c185c93 99e1a1c 3eb90d8 99e1a1c 1f9fe23 99e1a1c 3eb90d8 99e1a1c c185c93 99e1a1c c185c93 99e1a1c 1f9fe23 99e1a1c 9848a2d 3eb90d8 99e1a1c 1f9fe23 99e1a1c 274de8d 99e1a1c 6cc741e 1f9fe23 87ff579 1f9fe23 99e1a1c 1f9fe23 87ff579 1f9fe23 87ff579 1f9fe23 99e1a1c 87ff579 99e1a1c 87ff579 1f9fe23 274de8d 1f9fe23 515d9a6 1f9fe23 ace8959 87ff579 ace8959 24b052a 515d9a6 24b052a 99e1a1c 24b052a 515d9a6 24b052a 99e1a1c 274de8d e1eb3b2 24b052a 99e1a1c 24b052a 99e1a1c 24b052a 1f9fe23 99e1a1c 87ff579 274de8d 99e1a1c 274de8d 99e1a1c 87ff579 99e1a1c 274de8d 99e1a1c 274de8d 99e1a1c 274de8d 99e1a1c e1eb3b2 99e1a1c 87ff579 99e1a1c 87ff579 1f9fe23 99e1a1c 1f9fe23 3eb90d8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 | import streamlit as st
from PyPDF2 import PdfReader
from io import BytesIO
import os
import tempfile
import shutil
# Fixed imports for LangChain
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
# --- Get API key from Hugging Face Secrets ---
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
# Use temporary directory for Hugging Face Spaces
TEMP_DIR = tempfile.gettempdir()
FAISS_INDEX_PATH = os.path.join(TEMP_DIR, "faiss_index")
# PDF file path
PDF_FILE_PATH = "./slide.pdf"
# Current GA embedding model (legacy models have been shut down)
EMBEDDING_MODEL = "models/gemini-embedding-001"
def get_pdf_text_from_file(pdf_path):
"""Read PDF from file path"""
text = ""
if not os.path.exists(pdf_path):
raise FileNotFoundError(f"PDF file not found: {pdf_path}")
pdf_reader = PdfReader(pdf_path)
for page in pdf_reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text
return text
def get_text_chunks(text):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
return text_splitter.split_text(text)
def get_vector_store(text_chunks, api_key):
# Clear any stale index (dimension mismatch if switching from old model)
if os.path.exists(FAISS_INDEX_PATH):
shutil.rmtree(FAISS_INDEX_PATH)
embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL, google_api_key=api_key)
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
vector_store.save_local(FAISS_INDEX_PATH)
def get_response(user_question, api_key, chat_history):
"""Get response from the AI model with chat history context"""
embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL, google_api_key=api_key)
new_db = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
docs = new_db.similarity_search(user_question, k=4)
# Format chat history for context
history_text = ""
for msg in chat_history[-10:]: # Keep last 10 messages for context
role = "User" if msg["role"] == "user" else "Assistant"
history_text += f"{role}: {msg['content']}\n"
# Combine document contents
context = "\n\n".join([doc.page_content for doc in docs])
prompt_template = f"""
You are a helpful assistant for Antimicrobial Pharmacology. You answer questions based ONLY on the context provided from the PDF documents.
IMPORTANT RULES:
1. Do not use any external knowledge or assumptions.
2. If the answer is not found in the context, reply with "I don't know based on the provided materials."
3. Be conversational and helpful.
4. When generating MCQs, create questions that test understanding of the material.
5. When checking answers, be encouraging and provide explanations.
6. IMPORTANT: When referencing information from the course materials, always say "your professor says" or "according to your professor" instead of "the text states", "the document states", "the text says", or similar phrases. This makes the learning experience more personal and connected to the course.
Chat History:
{history_text}
Context from PDF:
{context}
Current Question:
{user_question}
Instructions:
- If the user asks for a multiple choice question (MCQ), quiz, or test question:
* Generate a question with 4 options (A, B, C, D) based ONLY on the context
* Make sure the question tests important concepts from the material
* Do NOT reveal the correct answer yet
* Ask the user to select their answer
- If the user provides an answer (like "A", "B", "C", "D" or the full answer text) AND there was a recent MCQ in the chat history:
* Check if the answer is correct based on the context
* If correct: Congratulate them and explain why it's correct, referencing what "your professor says" in the materials
* If incorrect: Encourage them, reveal the correct answer, and explain why using "your professor says" to reference the source material
- For regular questions: Answer based on the context provided, using "your professor says" when citing information
Remember: Never use phrases like "the text states", "the document says", "according to the text", etc. Always use "your professor says" or "according to your professor" instead.
Answer:
"""
model = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.3, google_api_key=api_key)
response = model.invoke(prompt_template)
return response.content
def main():
st.set_page_config(
page_title="Antimicrobial Pharmacology Chatbot",
page_icon="💊",
layout="wide"
)
st.header("Antimicrobial Pharmacology Chatbot (RX24)")
st.markdown("---")
# Initialize session state
if "api_entered" not in st.session_state:
st.session_state["api_entered"] = False
if "pdf_processed" not in st.session_state:
st.session_state["pdf_processed"] = False
if "messages" not in st.session_state:
st.session_state["messages"] = []
# Check for API key
api_key = GOOGLE_API_KEY
# STEP 1: API Key handling
if not st.session_state["api_entered"]:
if not api_key:
st.warning("Google API Key not found in environment variables.")
st.info("Please add GOOGLE_API_KEY to your Hugging Face Space secrets or enter it below.")
user_api_key = st.text_input(
"Enter your Gemini API key",
type="password",
help="Get your API key from https://makersuite.google.com/app/apikey"
)
if st.button("Continue", type="primary") and user_api_key:
st.session_state["user_api_key"] = user_api_key
st.session_state["api_entered"] = True
st.rerun()
st.stop()
else:
st.session_state["user_api_key"] = api_key
st.session_state["api_entered"] = True
api_key = st.session_state.get("user_api_key", "")
# STEP 2: Auto-process PDF file
if not st.session_state["pdf_processed"]:
st.subheader("Processing Antimicrobial Pharmacology Course")
with st.spinner("Processing Antimicrobial Pharmacology Course... This may take a moment."):
try:
raw_text = get_pdf_text_from_file(PDF_FILE_PATH)
if not raw_text.strip():
st.error("No text could be extracted from the PDF. Please check your file.")
st.stop()
st.info(f"Processing: {PDF_FILE_PATH}")
text_chunks = get_text_chunks(raw_text)
get_vector_store(text_chunks, api_key)
st.session_state["pdf_processed"] = True
st.success("PDF processed successfully. You can now ask questions.")
st.rerun()
except (st.runtime.scriptrunner.RerunException, st.runtime.scriptrunner.StopException):
raise
except FileNotFoundError as e:
st.error(str(e))
st.stop()
except Exception as e:
st.error(f"Error processing PDF: {str(e)}")
st.stop()
# STEP 3: Chat Interface
# Sidebar with options
with st.sidebar:
st.subheader("Options")
st.info("Loaded: Antimicrobial Pharmacology Course")
if st.button("Reprocess PDF"):
st.session_state["pdf_processed"] = False
st.rerun()
if st.button("Clear Chat History"):
st.session_state["messages"] = []
st.rerun()
st.markdown("---")
st.subheader("Quick Actions")
st.markdown("""
Try asking:
- "Give me a multiple choice question"
- "Quiz me on antibiotics"
- "Generate an MCQ about [topic]"
- "What are the main topics?"
""")
st.markdown("---")
st.subheader("How to use MCQs")
st.markdown("""
1. Ask for an MCQ (e.g., "Give me a quiz question")
2. The bot will generate a question with options A-D
3. Reply with your answer (e.g., "A" or "The answer is B")
4. Get feedback on whether you're correct
""")
# Main chat area
st.subheader("Chat with your Pharmacology Assistant")
# Display chat messages
for message in st.session_state["messages"]:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Quick action buttons (only show if no messages yet)
if len(st.session_state["messages"]) == 0:
st.markdown("### Quick Start")
col1, col2, col3 = st.columns(3)
with col1:
if st.button("Generate MCQ", use_container_width=True):
quick_question = "Give me a multiple choice question to test my knowledge"
st.session_state["messages"].append({"role": "user", "content": quick_question})
with st.spinner("Generating question..."):
response = get_response(quick_question, api_key, st.session_state["messages"])
st.session_state["messages"].append({"role": "assistant", "content": response})
st.rerun()
with col2:
if st.button("Summarize Topics", use_container_width=True):
quick_question = "What are the main topics covered in this material?"
st.session_state["messages"].append({"role": "user", "content": quick_question})
with st.spinner("Analyzing..."):
response = get_response(quick_question, api_key, st.session_state["messages"])
st.session_state["messages"].append({"role": "assistant", "content": response})
st.rerun()
with col3:
if st.button("How can you help?", use_container_width=True):
quick_question = "What can you help me with regarding this pharmacology material?"
st.session_state["messages"].append({"role": "user", "content": quick_question})
with st.spinner("Processing..."):
response = get_response(quick_question, api_key, st.session_state["messages"])
st.session_state["messages"].append({"role": "assistant", "content": response})
st.rerun()
# Chat input
if user_question := st.chat_input("Ask a question or answer an MCQ..."):
# Add user message to chat history
st.session_state["messages"].append({"role": "user", "content": user_question})
# Display user message
with st.chat_message("user"):
st.markdown(user_question)
# Generate and display assistant response
with st.chat_message("assistant"):
with st.spinner("Processing..."):
try:
response = get_response(user_question, api_key, st.session_state["messages"])
st.markdown(response)
st.session_state["messages"].append({"role": "assistant", "content": response})
except (st.runtime.scriptrunner.RerunException, st.runtime.scriptrunner.StopException):
raise
except Exception as e:
error_msg = f"Error getting answer: {str(e)}"
st.error(error_msg)
st.session_state["messages"].append({"role": "assistant", "content": error_msg})
# Add footer
st.markdown("---")
st.markdown(
"""
<div style='text-align: center'>
<small>Antimicrobial Pharmacology Chatbot - Powered by Gemini AI</small>
</div>
""",
unsafe_allow_html=True
)
if __name__ == "__main__":
main() |