Spaces:

broadfield-dev
/

bible-app

Paused

App Files Files Community

broadfield-dev commited on Sep 13, 2025

Commit

8b13354

verified ·

1 Parent(s): 80d005a

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -13

app.py CHANGED Viewed

@@ -1,34 +1,105 @@
-from flask import Flask, render_template, request
 from datasets import load_dataset
 import torch
 from transformers import AutoTokenizer, AutoModel
 import numpy as np
 # --- 1. Initialize Flask App ---
 app = Flask(__name__)
-# --- 2. Load Models and Dataset (Done once on startup) ---
-print("Loading models and dataset...")
-# Point this to your Hugging Face Dataset repository
-DATASET_REPO = "YourUsername/bible-rag-gemma-with-faiss"
-MODEL_NAME = "google/embeddinggemma-300m"
-# Load the pre-built dataset and FAISS index
-rag_dataset = load_dataset(DATASET_REPO)['train']
-# Load the Gemma model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-embedding_model = AutoModel.from_pretrained(MODEL_NAME)
-print("Models and dataset loaded successfully!")
 # --- 3. Define App Routes ---
 @app.route('/')
 def home():
     return render_template('index.html')
 @app.route('/search', methods=['POST'])
 def search():
     user_query = request.form['query']
     if not user_query:
         return render_template('index.html', results=[])
@@ -39,7 +110,6 @@ def search():
         outputs = embedding_model(**inputs)
     query_embedding = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
-    # FAISS expects a flattened numpy array
     query_embedding = np.float32(query_embedding)
     # --- Search the FAISS index ---

+import sys
+import subprocess
+from flask import Flask, render_template, request, flash, redirect, url_for
 from datasets import load_dataset
 import torch
 from transformers import AutoTokenizer, AutoModel
 import numpy as np
+import os
 # --- 1. Initialize Flask App ---
 app = Flask(__name__)
+# A secret key is needed for flashing messages to the user's session
+app.secret_key = os.urandom(24)
+# --- 2. Configuration & Resource Loading ---
+print("Starting application...")
+# Point this to the Hugging Face Dataset repository you want to create/use.
+# This MUST match the DATASET_REPO in build_rag.py
+DATASET_REPO = "broadfield-dev/bible-rag-dataset-gemma"
+MODEL_NAME = "google/gemma-2b" # Use a consistent model for embedding and searching
+# Global variables for the dataset and models
+rag_dataset = None
+tokenizer = None
+embedding_model = None
+def load_resources():
+    """
+    Attempts to load the dataset and models from the Hugging Face Hub.
+    Returns True on success, False on failure.
+    """
+    global rag_dataset, tokenizer, embedding_model
+    if rag_dataset:
+        return True
+    print(f"Attempting to load resources: {DATASET_REPO} and {MODEL_NAME}")
+    try:
+        # Load the pre-built dataset with the FAISS index
+        rag_dataset = load_dataset(DATASET_REPO)['train']
+        # Load the Gemma model and tokenizer
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+        embedding_model = AutoModel.from_pretrained(MODEL_NAME)
+        print("Models and dataset loaded successfully!")
+        return True
+    except Exception as e:
+        print(f"Could not load RAG dataset from '{DATASET_REPO}'. It may not exist yet.")
+        print(f"Error: {e}")
+        # Reset globals to ensure a clean state
+        rag_dataset = None
+        tokenizer = None
+        embedding_model = None
+        return False
+# Try to load resources on startup. The app can still run if this fails.
+resources_loaded = load_resources()
 # --- 3. Define App Routes ---
 @app.route('/')
 def home():
+    if not resources_loaded:
+        flash(f"Welcome! The required RAG dataset '{DATASET_REPO}' is not loaded. Please use the 'Build RAG Dataset' button to create and upload it.", "warning")
     return render_template('index.html')
+@app.route('/build-rag', methods=['POST'])
+def build_rag_route():
+    """
+    Triggers the build_rag.py script as a background process.
+    NOTE: This requires a Hugging Face token with 'write' permissions
+    to be saved as a secret named HF_TOKEN in the Space settings.
+    """
+    print("RAG build process requested.")
+    try:
+        # Use Popen to run the script in the background without blocking the app.
+        process = subprocess.Popen(
+            [sys.executable, "build_rag.py"],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True
+        )
+        print(f"Started build process with PID: {process.pid}")
+        flash("RAG build process initiated! This will run in the background and can take several minutes. Please check the Space logs for progress. Once complete, you can start searching.", "info")
+    except Exception as e:
+        print(f"Failed to start build process: {e}")
+        flash(f"An error occurred while trying to start the build process: {e}", "error")
+    return redirect(url_for('home'))
 @app.route('/search', methods=['POST'])
 def search():
+    global resources_loaded
+    # If resources weren't loaded, try again in case the build just finished.
+    if not resources_loaded:
+        print("Resources not loaded. Attempting to reload for search...")
+        resources_loaded = load_resources()
+        if not resources_loaded:
+            flash("The RAG dataset is not ready yet. Please wait for the build process to complete or check the logs for errors.", "error")
+            return redirect(url_for('home'))
     user_query = request.form['query']
     if not user_query:
         return render_template('index.html', results=[])
         outputs = embedding_model(**inputs)
     query_embedding = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
     query_embedding = np.float32(query_embedding)
     # --- Search the FAISS index ---