RAG / app_hf_space.py
lantzmurray's picture
Upload app_hf_space.py
0670049 verified
# app_hf_space.py
import streamlit as st
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores.faiss import FAISS
from langchain.chains import RetrievalQA
from langchain_huggingface.llms import HuggingFacePipeline
# --- Configuration ---
# Use Hugging Face inference API via the `pipeline` from langchain-huggingface
EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
LLM_MODEL_ID = "google/flan-t5-small"
INDEX_DIR = "faiss_index"
# 1. Initialize embeddings via HF Inference API
embeddings = HuggingFaceEmbeddings(
model_name=EMBED_MODEL_ID,
cache_dir=".hf_cache"
)
# 2. Load FAISS index
store = FAISS.load_local(
INDEX_DIR,
embeddings
)
# 3. Initialize HF LLM via pipeline (inference API)
llm = HuggingFacePipeline.from_model_id(
model_id=LLM_MODEL_ID,
task="text2text-generation"
)
# 4. Build RetrievalQA chain
aqa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=store.as_retriever()
)
# 5. Streamlit UI
st.title("πŸ¦œπŸ”— RAG App via HF Spaces")
query = st.text_input("Ask a question about your docs:")
if query:
with st.spinner("Generating answer via HF Space..."):
answer = aqa_chain.run(query)
st.markdown(f"**Answer:** {answer}")
# NOTE: Deploy this to Hugging Face Spaces for fully-managed hosting.
# Just push this file to your repo on HF and enable Streamlit space.