File size: 12,180 Bytes
1f9fe23
 
c8e2f30
1f9fe23
87ff579
9848a2d
1f9fe23
87ff579
 
1f9fe23
 
 
 
87ff579
1f9fe23
 
87ff579
 
 
 
515d9a6
 
24b052a
e1eb3b2
9848a2d
 
99e1a1c
515d9a6
 
1f9fe23
515d9a6
 
24b052a
515d9a6
 
 
 
 
 
1f9fe23
99e1a1c
1f9fe23
 
 
 
99e1a1c
1f9fe23
9848a2d
 
 
 
 
1f9fe23
87ff579
1f9fe23
99e1a1c
3eb90d8
 
9848a2d
3eb90d8
 
 
 
 
 
 
 
 
 
 
 
 
99e1a1c
 
 
 
 
 
 
 
c185c93
99e1a1c
 
3eb90d8
99e1a1c
 
1f9fe23
99e1a1c
 
3eb90d8
99e1a1c
 
 
 
 
 
 
 
 
 
c185c93
 
99e1a1c
c185c93
 
 
99e1a1c
1f9fe23
 
99e1a1c
9848a2d
3eb90d8
 
99e1a1c
1f9fe23
 
99e1a1c
 
274de8d
99e1a1c
 
 
6cc741e
1f9fe23
 
87ff579
1f9fe23
 
 
 
99e1a1c
 
1f9fe23
87ff579
1f9fe23
 
87ff579
1f9fe23
 
99e1a1c
87ff579
99e1a1c
 
 
 
 
87ff579
1f9fe23
 
274de8d
1f9fe23
 
 
 
 
 
 
515d9a6
1f9fe23
ace8959
87ff579
ace8959
24b052a
515d9a6
24b052a
99e1a1c
24b052a
 
515d9a6
24b052a
 
 
 
99e1a1c
274de8d
e1eb3b2
 
24b052a
99e1a1c
24b052a
 
99e1a1c
24b052a
1f9fe23
99e1a1c
 
 
 
 
 
 
87ff579
274de8d
99e1a1c
 
 
274de8d
99e1a1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87ff579
99e1a1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274de8d
99e1a1c
 
 
 
 
 
 
 
274de8d
99e1a1c
 
 
 
 
 
 
 
274de8d
99e1a1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e1eb3b2
 
99e1a1c
 
 
 
87ff579
 
 
 
 
 
99e1a1c
87ff579
 
 
 
1f9fe23
99e1a1c
1f9fe23
3eb90d8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
import streamlit as st
from PyPDF2 import PdfReader
from io import BytesIO 
import os
import tempfile
import shutil

# Fixed imports for LangChain
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI

# --- Get API key from Hugging Face Secrets ---
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")

# Use temporary directory for Hugging Face Spaces
TEMP_DIR = tempfile.gettempdir()
FAISS_INDEX_PATH = os.path.join(TEMP_DIR, "faiss_index")

# PDF file path
PDF_FILE_PATH = "./slide.pdf"

# Current GA embedding model (legacy models have been shut down)
EMBEDDING_MODEL = "models/gemini-embedding-001"


def get_pdf_text_from_file(pdf_path):
    """Read PDF from file path"""
    text = ""
    if not os.path.exists(pdf_path):
        raise FileNotFoundError(f"PDF file not found: {pdf_path}")
    
    pdf_reader = PdfReader(pdf_path)
    for page in pdf_reader.pages:
        page_text = page.extract_text()
        if page_text:
            text += page_text
    return text


def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
    return text_splitter.split_text(text)


def get_vector_store(text_chunks, api_key):
    # Clear any stale index (dimension mismatch if switching from old model)
    if os.path.exists(FAISS_INDEX_PATH):
        shutil.rmtree(FAISS_INDEX_PATH)

    embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL, google_api_key=api_key)
    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
    vector_store.save_local(FAISS_INDEX_PATH)


def get_response(user_question, api_key, chat_history):
    """Get response from the AI model with chat history context"""
    embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL, google_api_key=api_key)
    new_db = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
    docs = new_db.similarity_search(user_question, k=4)
    
    # Format chat history for context
    history_text = ""
    for msg in chat_history[-10:]:  # Keep last 10 messages for context
        role = "User" if msg["role"] == "user" else "Assistant"
        history_text += f"{role}: {msg['content']}\n"
    
    # Combine document contents
    context = "\n\n".join([doc.page_content for doc in docs])
    
    prompt_template = f"""
    You are a helpful assistant for Antimicrobial Pharmacology. You answer questions based ONLY on the context provided from the PDF documents.
    
    IMPORTANT RULES:
    1. Do not use any external knowledge or assumptions.
    2. If the answer is not found in the context, reply with "I don't know based on the provided materials."
    3. Be conversational and helpful.
    4. When generating MCQs, create questions that test understanding of the material.
    5. When checking answers, be encouraging and provide explanations.
    6. IMPORTANT: When referencing information from the course materials, always say "your professor says" or "according to your professor" instead of "the text states", "the document states", "the text says", or similar phrases. This makes the learning experience more personal and connected to the course.
    
    Chat History:
    {history_text}
    
    Context from PDF:
    {context}
    
    Current Question:
    {user_question}
    
    Instructions:
    - If the user asks for a multiple choice question (MCQ), quiz, or test question:
      * Generate a question with 4 options (A, B, C, D) based ONLY on the context
      * Make sure the question tests important concepts from the material
      * Do NOT reveal the correct answer yet
      * Ask the user to select their answer
    
    - If the user provides an answer (like "A", "B", "C", "D" or the full answer text) AND there was a recent MCQ in the chat history:
      * Check if the answer is correct based on the context
      * If correct: Congratulate them and explain why it's correct, referencing what "your professor says" in the materials
      * If incorrect: Encourage them, reveal the correct answer, and explain why using "your professor says" to reference the source material
    
    - For regular questions: Answer based on the context provided, using "your professor says" when citing information
    
    Remember: Never use phrases like "the text states", "the document says", "according to the text", etc. Always use "your professor says" or "according to your professor" instead.
    
    Answer:
    """
    
    model = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.3, google_api_key=api_key)
    response = model.invoke(prompt_template)
    return response.content


def main():
    st.set_page_config(
        page_title="Antimicrobial Pharmacology Chatbot", 
        page_icon="💊",
        layout="wide"
    )
    
    st.header("Antimicrobial Pharmacology Chatbot (RX24)")
    st.markdown("---")

    # Initialize session state
    if "api_entered" not in st.session_state:
        st.session_state["api_entered"] = False
    if "pdf_processed" not in st.session_state:
        st.session_state["pdf_processed"] = False
    if "messages" not in st.session_state:
        st.session_state["messages"] = []

    # Check for API key
    api_key = GOOGLE_API_KEY

    # STEP 1: API Key handling
    if not st.session_state["api_entered"]:
        if not api_key:
            st.warning("Google API Key not found in environment variables.")
            st.info("Please add GOOGLE_API_KEY to your Hugging Face Space secrets or enter it below.")
            user_api_key = st.text_input(
                "Enter your Gemini API key", 
                type="password", 
                help="Get your API key from https://makersuite.google.com/app/apikey"
            )
            if st.button("Continue", type="primary") and user_api_key:
                st.session_state["user_api_key"] = user_api_key
                st.session_state["api_entered"] = True
                st.rerun()
            st.stop()
        else:
            st.session_state["user_api_key"] = api_key
            st.session_state["api_entered"] = True

    api_key = st.session_state.get("user_api_key", "")

    # STEP 2: Auto-process PDF file
    if not st.session_state["pdf_processed"]:
        st.subheader("Processing Antimicrobial Pharmacology Course")
        
        with st.spinner("Processing Antimicrobial Pharmacology Course... This may take a moment."):
            try:
                raw_text = get_pdf_text_from_file(PDF_FILE_PATH)
                if not raw_text.strip():
                    st.error("No text could be extracted from the PDF. Please check your file.")
                    st.stop()
                
                st.info(f"Processing: {PDF_FILE_PATH}")
                
                text_chunks = get_text_chunks(raw_text)
                get_vector_store(text_chunks, api_key)
                st.session_state["pdf_processed"] = True
                st.success("PDF processed successfully. You can now ask questions.")
                st.rerun()
            except (st.runtime.scriptrunner.RerunException, st.runtime.scriptrunner.StopException):
                raise
            except FileNotFoundError as e:
                st.error(str(e))
                st.stop()
            except Exception as e:
                st.error(f"Error processing PDF: {str(e)}")
                st.stop()

    # STEP 3: Chat Interface
    # Sidebar with options
    with st.sidebar:
        st.subheader("Options")
        st.info("Loaded: Antimicrobial Pharmacology Course")
        
        if st.button("Reprocess PDF"):
            st.session_state["pdf_processed"] = False
            st.rerun()
        
        if st.button("Clear Chat History"):
            st.session_state["messages"] = []
            st.rerun()
        
        st.markdown("---")
        st.subheader("Quick Actions")
        st.markdown("""
        Try asking:
        - "Give me a multiple choice question"
        - "Quiz me on antibiotics"
        - "Generate an MCQ about [topic]"
        - "What are the main topics?"
        """)
        
        st.markdown("---")
        st.subheader("How to use MCQs")
        st.markdown("""
        1. Ask for an MCQ (e.g., "Give me a quiz question")
        2. The bot will generate a question with options A-D
        3. Reply with your answer (e.g., "A" or "The answer is B")
        4. Get feedback on whether you're correct
        """)

    # Main chat area
    st.subheader("Chat with your Pharmacology Assistant")
    
    # Display chat messages
    for message in st.session_state["messages"]:
        with st.chat_message(message["role"]):
            st.markdown(message["content"])

    # Quick action buttons (only show if no messages yet)
    if len(st.session_state["messages"]) == 0:
        st.markdown("### Quick Start")
        col1, col2, col3 = st.columns(3)
        
        with col1:
            if st.button("Generate MCQ", use_container_width=True):
                quick_question = "Give me a multiple choice question to test my knowledge"
                st.session_state["messages"].append({"role": "user", "content": quick_question})
                with st.spinner("Generating question..."):
                    response = get_response(quick_question, api_key, st.session_state["messages"])
                    st.session_state["messages"].append({"role": "assistant", "content": response})
                st.rerun()
        
        with col2:
            if st.button("Summarize Topics", use_container_width=True):
                quick_question = "What are the main topics covered in this material?"
                st.session_state["messages"].append({"role": "user", "content": quick_question})
                with st.spinner("Analyzing..."):
                    response = get_response(quick_question, api_key, st.session_state["messages"])
                    st.session_state["messages"].append({"role": "assistant", "content": response})
                st.rerun()
        
        with col3:
            if st.button("How can you help?", use_container_width=True):
                quick_question = "What can you help me with regarding this pharmacology material?"
                st.session_state["messages"].append({"role": "user", "content": quick_question})
                with st.spinner("Processing..."):
                    response = get_response(quick_question, api_key, st.session_state["messages"])
                    st.session_state["messages"].append({"role": "assistant", "content": response})
                st.rerun()

    # Chat input
    if user_question := st.chat_input("Ask a question or answer an MCQ..."):
        # Add user message to chat history
        st.session_state["messages"].append({"role": "user", "content": user_question})
        
        # Display user message
        with st.chat_message("user"):
            st.markdown(user_question)
        
        # Generate and display assistant response
        with st.chat_message("assistant"):
            with st.spinner("Processing..."):
                try:
                    response = get_response(user_question, api_key, st.session_state["messages"])
                    st.markdown(response)
                    st.session_state["messages"].append({"role": "assistant", "content": response})
                except (st.runtime.scriptrunner.RerunException, st.runtime.scriptrunner.StopException):
                    raise
                except Exception as e:
                    error_msg = f"Error getting answer: {str(e)}"
                    st.error(error_msg)
                    st.session_state["messages"].append({"role": "assistant", "content": error_msg})

    # Add footer
    st.markdown("---")
    st.markdown(
        """
        <div style='text-align: center'>
            <small>Antimicrobial Pharmacology Chatbot - Powered by Gemini AI</small>
        </div>
        """, 
        unsafe_allow_html=True
    )


if __name__ == "__main__":
    main()