Spaces:

Adoption
/

The-Seventh-Handle

Build error

App Files Files Community

The-Seventh-Handle / app.py

Adoption

Upload 2 files

da6f335 verified 3 months ago

raw

history blame contribute delete

3.62 kB

	import os
	import pickle
	import sys
	from dotenv import load_dotenv
	from langchain_core.documents import Document
	from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
	from langchain_google_genai import HarmBlockThreshold, HarmCategory
	from langchain_community.retrievers import BM25Retriever
	from langchain.retrievers import EnsembleRetriever
	from langchain_chroma import Chroma
	from langchain.prompts import PromptTemplate
	from langchain.chains import RetrievalQA

	load_dotenv()

	# --- NEW: GET ABSOLUTE PATHS ---
	# This ensures Python finds files regardless of where you run the command from
	BASE_DIR = os.path.dirname(os.path.abspath(__file__))
	DB_PATH = os.path.join(BASE_DIR, "branham_db")
	CHUNKS_PATH = os.path.join(BASE_DIR, "sermon_chunks.pkl")


	def get_rag_chain():
	"""Initializes the RAG system."""

	api_key = os.getenv("GOOGLE_API_KEY")
	if not api_key:
	raise ValueError("GOOGLE_API_KEY missing. Please set it in .env file.")

	# 1. Load Vector DB (Semantic Search)
	embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")

	if not os.path.exists(DB_PATH):
	raise FileNotFoundError(f"Database not found at {DB_PATH}. Run 'python ingest.py' first.")

	vector_db = Chroma(
	persist_directory=DB_PATH,
	embedding_function=embeddings,
	collection_name="branham_sermons"
	)
	vector_retriever = vector_db.as_retriever(search_kwargs={"k": 4})

	# 2. Load Keyword Retriever (BM25)
	# We stopped hiding the error. If it fails, we want to know WHY (Corruption? Version mismatch?)
	if not os.path.exists(CHUNKS_PATH):
	raise FileNotFoundError(f"File not found: {CHUNKS_PATH}")

	try:
	with open(CHUNKS_PATH, "rb") as f:
	chunks = pickle.load(f)
	keyword_retriever = BM25Retriever.from_documents(chunks)
	keyword_retriever.k = 4
	except Exception as e:
	# This will print the ACTUAL error if the file exists but fails to load
	raise RuntimeError(f"Failed to load sermon_chunks.pkl. Error: {e}")

	# 3. Hybrid Search
	ensemble_retriever = EnsembleRetriever(
	retrievers=[vector_retriever, keyword_retriever],
	weights=[0.6, 0.4]
	)

	# 4. Gemini Model (The Voice)
	llm = ChatGoogleGenerativeAI(
	model="gemini-2.5-pro",
	temperature=0.3,
	google_api_key=api_key,
	safety_settings={
	HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH,
	HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
	HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
	HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
	}
	)

	# 5. The Persona Prompt
	template = """You are answering a question based ONLY on the sermon excerpts provided below.

	INSTRUCTIONS:
	- Use a humble, 1950s Southern preaching dialect.
	- If the answer is not in the text, say: "Brother, I don't recall preaching specifically on that detail in these messages."

	CONTEXT MESSAGES:
	{context}

	USER QUESTION: {question}

	BROTHER BRANHAM'S REPLY:"""

	PROMPT = PromptTemplate(template=template, input_variables=["context", "question"])

	chain = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=ensemble_retriever,
	return_source_documents=True,
	chain_type_kwargs={"prompt": PROMPT}
	)

	return chain