Spaces:

sahilursa
/

HalassaLabBot

Runtime error

App Files Files Community

sahilursa commited on Aug 8, 2025

Commit

ec52082

verified ·

1 Parent(s): 0527d23

Update app.py

Browse files

Files changed (1) hide show

app.py +123 -123

app.py CHANGED Viewed

@@ -1,124 +1,124 @@
-# Step 1: Import Libraries
-# No changes needed here, these will be installed from your requirements.txt
-import gradio as gr
-from sentence_transformers import SentenceTransformer
-import faiss
-import numpy as np
-import google.generativeai as genai
-import pickle
-import os
-# Step 2: Configure API Key from Hugging Face Secrets
-# This section is modified to securely access the API key from your Space's secrets.
-try:
-    # Make sure to set 'GOOGLE_API_KEY' in your Hugging Face Space's settings/secrets
-    GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY')
-    if GOOGLE_API_KEY is None:
-        raise ValueError("GOOGLE_API_KEY not found in environment variables.")
-    genai.configure(api_key=GOOGLE_API_KEY)
-    print("API Key configured successfully.")
-except Exception as e:
-    print(f"ERROR: Could not configure API key. Please ensure 'GOOGLE_API_KEY' is set in your Hugging Face Space secrets. Details: {e}")
-# Step 3: Define Data Path
-# This is updated to point to the 'rag_chatbot_data' folder you uploaded to the Space.
-# It no longer uses Google Drive.
-DATA_PATH = "rag_chatbot_data"
-vector_store_file = os.path.join(DATA_PATH, "vector_store.index")
-data_file = os.path.join(DATA_PATH, "data.pkl")
-# Step 4: Load Models and Pre-processed Data
-# This section is slightly restructured to load models only if the data is found.
-vector_store_data = None
-if os.path.exists(vector_store_file) and os.path.exists(data_file):
-    try:
-        print("Loading pre-processed data from the repository...")
-        index = faiss.read_index(vector_store_file)
-        with open(data_file, "rb") as f:
-            stored_data = pickle.load(f)
-            texts = stored_data["texts"]
-            sources = stored_data["sources"]
-        print("✅ Data loaded successfully.")
-        # Store everything in a state object for Gradio
-        vector_store_data = (index, texts, sources)
-        # Load the embedding and LLM models
-        print("Loading AI and embedding models...")
-        embedding_model = SentenceTransformer('BAAI/bge-large-en-v1.5')
-        llm = genai.GenerativeModel('gemini-pro') # Using gemini-pro as it's a robust choice
-        print("✅ Models loaded successfully.")
-    except Exception as e:
-        print(f"❌ ERROR: An error occurred during data or model loading: {e}")
-        vector_store_data = None
-else:
-    print("❌ ERROR: Pre-processed data not found in the repository.")
-    print(f"Please make sure '{vector_store_file}' and '{data_file}' exist in the '{DATA_PATH}' folder.")
-# Step 5: RAG and Chat Functions (No changes needed here)
-def get_relevant_context(query, index, top_k=5):
-    query_embedding = embedding_model.encode([query])
-    distances, indices = index.search(query_embedding, top_k)
-    context = []
-    # Ensure that the indices are within the bounds of the texts list
-    for i in indices[0]:
-        if i < len(texts):
-            context.append({"text": texts[i], "source": sources[i]})
-    return context
-def chat_with_rag(message, history, vector_store_data):
-    index, texts, sources = vector_store_data
-    relevant_context = get_relevant_context(message, index)
-    context_str = "\n\n".join([f"Source: {c['source']}\nContent: {c['text']}" for c in relevant_context])
-    prompt = f"""
-    You are an Expert scientist in the Halassa Lab at MIT, an expert in computational neuroscience.
-    Your primary goal is to answer questions as thoroughly and accurately as possible. You may intelligently synthesize information from the provided context, which consists of key papers from the lab.
-    Follow these rules strictly if you are using the provided context:
-    1.  Do not simply copy-paste from the context. Read the relevant passages and formulate a comprehensive, well-written answer in your own words.
-    2.  When your answer uses information directly from a provided paper, you MUST cite the source at the end of the output in a list. Use the format [filename - Page X].
-    3.  If the provided papers offer relevant concepts but do not contain the full answer, use your broader knowledge of computational neuroscience to provide a more complete explanation.
-    Context from the Halassa Lab's papers:
-    ---
-    {context_str}
-    ---
-    User Question: {message}
-    Expert Answer:
-    """
-    try:
-        response = llm.generate_content(prompt)
-        return response.text
-    except Exception as e:
-        return f"An error occurred with the AI model: {str(e)}"
-# Step 6: Gradio User Interface
-with gr.Blocks(theme=gr.themes.Soft(), title="Halassa Literature Chatbot") as demo:
-    gr.Markdown("# Halassa Lab Onboarder")
-    if vector_store_data is None:
-        gr.Markdown("## ⚠️ Error: Could not load data or models. Please check the logs in the Hugging Face Space for details.")
-    else:
-        gr.Markdown("The documents have been pre-loaded. Ask your questions below.")
-        chatbot_ui = gr.Chatbot(label="Chat History", height=600, layout="panel")
-        message_box = gr.Textbox(label="Ask your question...", lines=3)
-        clear_button = gr.ClearButton(components=[chatbot_ui, message_box])
-        def respond(message, history):
-            # Pass the loaded vector_store_data to the chat function
-            response_text = chat_with_rag(message, history, vector_store_data)
-            history.append((message, response_text))
-            return "", history
-        message_box.submit(respond, inputs=[message_box, chatbot_ui], outputs=[message_box, chatbot_ui])
-# Step 7: Launch the app
-# On Hugging Face, demo.launch() is all you need.
-# It will run the app and make it accessible.
-if vector_store_data:
     demo.launch()

+# Step 1: Import Libraries
+# No changes needed here, these will be installed from your requirements.txt
+import gradio as gr
+from sentence_transformers import SentenceTransformer
+import faiss
+import numpy as np
+import google.generativeai as genai
+import pickle
+import os
+# Step 2: Configure API Key from Hugging Face Secrets
+# This section is modified to securely access the API key from your Space's secrets.
+try:
+    # Make sure to set 'GOOGLE_API_KEY' in your Hugging Face Space's settings/secrets
+    GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY')
+    if GOOGLE_API_KEY is None:
+        raise ValueError("GOOGLE_API_KEY not found in environment variables.")
+    genai.configure(api_key=GOOGLE_API_KEY)
+    print("API Key configured successfully.")
+except Exception as e:
+    print(f"ERROR: Could not configure API key. Please ensure 'GOOGLE_API_KEY' is set in your Hugging Face Space secrets. Details: {e}")
+# Step 3: Define Data Path
+# This is updated to point to the 'rag_chatbot_data' folder you uploaded to the Space.
+# It no longer uses Google Drive.
+DATA_PATH = "data"
+vector_store_file = os.path.join(DATA_PATH, "vector_store.index")
+data_file = os.path.join(DATA_PATH, "data.pkl")
+# Step 4: Load Models and Pre-processed Data
+# This section is slightly restructured to load models only if the data is found.
+vector_store_data = None
+if os.path.exists(vector_store_file) and os.path.exists(data_file):
+    try:
+        print("Loading pre-processed data from the repository...")
+        index = faiss.read_index(vector_store_file)
+        with open(data_file, "rb") as f:
+            stored_data = pickle.load(f)
+            texts = stored_data["texts"]
+            sources = stored_data["sources"]
+        print("✅ Data loaded successfully.")
+        # Store everything in a state object for Gradio
+        vector_store_data = (index, texts, sources)
+        # Load the embedding and LLM models
+        print("Loading AI and embedding models...")
+        embedding_model = SentenceTransformer('BAAI/bge-large-en-v1.5')
+        llm = genai.GenerativeModel('gemini-pro') # Using gemini-pro as it's a robust choice
+        print("✅ Models loaded successfully.")
+    except Exception as e:
+        print(f"❌ ERROR: An error occurred during data or model loading: {e}")
+        vector_store_data = None
+else:
+    print("❌ ERROR: Pre-processed data not found in the repository.")
+    print(f"Please make sure '{vector_store_file}' and '{data_file}' exist in the '{DATA_PATH}' folder.")
+# Step 5: RAG and Chat Functions (No changes needed here)
+def get_relevant_context(query, index, top_k=5):
+    query_embedding = embedding_model.encode([query])
+    distances, indices = index.search(query_embedding, top_k)
+    context = []
+    # Ensure that the indices are within the bounds of the texts list
+    for i in indices[0]:
+        if i < len(texts):
+            context.append({"text": texts[i], "source": sources[i]})
+    return context
+def chat_with_rag(message, history, vector_store_data):
+    index, texts, sources = vector_store_data
+    relevant_context = get_relevant_context(message, index)
+    context_str = "\n\n".join([f"Source: {c['source']}\nContent: {c['text']}" for c in relevant_context])
+    prompt = f"""
+    You are an Expert scientist in the Halassa Lab at MIT, an expert in computational neuroscience.
+    Your primary goal is to answer questions as thoroughly and accurately as possible. You may intelligently synthesize information from the provided context, which consists of key papers from the lab.
+    Follow these rules strictly if you are using the provided context:
+    1.  Do not simply copy-paste from the context. Read the relevant passages and formulate a comprehensive, well-written answer in your own words.
+    2.  When your answer uses information directly from a provided paper, you MUST cite the source at the end of the output in a list. Use the format [filename - Page X].
+    3.  If the provided papers offer relevant concepts but do not contain the full answer, use your broader knowledge of computational neuroscience to provide a more complete explanation.
+    Context from the Halassa Lab's papers:
+    ---
+    {context_str}
+    ---
+    User Question: {message}
+    Expert Answer:
+    """
+    try:
+        response = llm.generate_content(prompt)
+        return response.text
+    except Exception as e:
+        return f"An error occurred with the AI model: {str(e)}"
+# Step 6: Gradio User Interface
+with gr.Blocks(theme=gr.themes.Soft(), title="Halassa Literature Chatbot") as demo:
+    gr.Markdown("# Halassa Lab Onboarder")
+    if vector_store_data is None:
+        gr.Markdown("## ⚠️ Error: Could not load data or models. Please check the logs in the Hugging Face Space for details.")
+    else:
+        gr.Markdown("The documents have been pre-loaded. Ask your questions below.")
+        chatbot_ui = gr.Chatbot(label="Chat History", height=600, layout="panel")
+        message_box = gr.Textbox(label="Ask your question...", lines=3)
+        clear_button = gr.ClearButton(components=[chatbot_ui, message_box])
+        def respond(message, history):
+            # Pass the loaded vector_store_data to the chat function
+            response_text = chat_with_rag(message, history, vector_store_data)
+            history.append((message, response_text))
+            return "", history
+        message_box.submit(respond, inputs=[message_box, chatbot_ui], outputs=[message_box, chatbot_ui])
+# Step 7: Launch the app
+# On Hugging Face, demo.launch() is all you need.
+# It will run the app and make it accessible.
+if vector_store_data:
     demo.launch()