# app_hf_space.py import streamlit as st from langchain_huggingface import HuggingFaceEmbeddings from langchain_community.vectorstores.faiss import FAISS from langchain.chains import RetrievalQA from langchain_huggingface.llms import HuggingFacePipeline # --- Configuration --- # Use Hugging Face inference API via the `pipeline` from langchain-huggingface EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2" LLM_MODEL_ID = "google/flan-t5-small" INDEX_DIR = "faiss_index" # 1. Initialize embeddings via HF Inference API embeddings = HuggingFaceEmbeddings( model_name=EMBED_MODEL_ID, cache_dir=".hf_cache" ) # 2. Load FAISS index store = FAISS.load_local( INDEX_DIR, embeddings ) # 3. Initialize HF LLM via pipeline (inference API) llm = HuggingFacePipeline.from_model_id( model_id=LLM_MODEL_ID, task="text2text-generation" ) # 4. Build RetrievalQA chain aqa_chain = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=store.as_retriever() ) # 5. Streamlit UI st.title("🦜🔗 RAG App via HF Spaces") query = st.text_input("Ask a question about your docs:") if query: with st.spinner("Generating answer via HF Space..."): answer = aqa_chain.run(query) st.markdown(f"**Answer:** {answer}") # NOTE: Deploy this to Hugging Face Spaces for fully-managed hosting. # Just push this file to your repo on HF and enable Streamlit space.