Spaces:

lantzmurray
/

RAG

Sleeping

RAG / app_hf_space.py

Upload app_hf_space.py

0670049 verified 8 months ago

1.46 kB

	# app_hf_space.py
	import streamlit as st
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_community.vectorstores.faiss import FAISS
	from langchain.chains import RetrievalQA
	from langchain_huggingface.llms import HuggingFacePipeline

	# --- Configuration ---
	# Use Hugging Face inference API via the `pipeline` from langchain-huggingface
	EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
	LLM_MODEL_ID = "google/flan-t5-small"
	INDEX_DIR = "faiss_index"

	# 1. Initialize embeddings via HF Inference API
	embeddings = HuggingFaceEmbeddings(
	model_name=EMBED_MODEL_ID,
	cache_dir=".hf_cache"
	)

	# 2. Load FAISS index
	store = FAISS.load_local(
	INDEX_DIR,
	embeddings
	)

	# 3. Initialize HF LLM via pipeline (inference API)
	llm = HuggingFacePipeline.from_model_id(
	model_id=LLM_MODEL_ID,
	task="text2text-generation"
	)

	# 4. Build RetrievalQA chain
	aqa_chain = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=store.as_retriever()
	)

	# 5. Streamlit UI
	st.title("🦜🔗 RAG App via HF Spaces")
	query = st.text_input("Ask a question about your docs:")

	if query:
	with st.spinner("Generating answer via HF Space..."):
	answer = aqa_chain.run(query)
	st.markdown(f"Answer: {answer}")

	# NOTE: Deploy this to Hugging Face Spaces for fully-managed hosting.
	# Just push this file to your repo on HF and enable Streamlit space.