Spaces:
Sleeping
Sleeping
| # app_hf_space.py | |
| import streamlit as st | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores.faiss import FAISS | |
| from langchain.chains import RetrievalQA | |
| from langchain_huggingface.llms import HuggingFacePipeline | |
| # --- Configuration --- | |
| # Use Hugging Face inference API via the `pipeline` from langchain-huggingface | |
| EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2" | |
| LLM_MODEL_ID = "google/flan-t5-small" | |
| INDEX_DIR = "faiss_index" | |
| # 1. Initialize embeddings via HF Inference API | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name=EMBED_MODEL_ID, | |
| cache_dir=".hf_cache" | |
| ) | |
| # 2. Load FAISS index | |
| store = FAISS.load_local( | |
| INDEX_DIR, | |
| embeddings | |
| ) | |
| # 3. Initialize HF LLM via pipeline (inference API) | |
| llm = HuggingFacePipeline.from_model_id( | |
| model_id=LLM_MODEL_ID, | |
| task="text2text-generation" | |
| ) | |
| # 4. Build RetrievalQA chain | |
| aqa_chain = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| chain_type="stuff", | |
| retriever=store.as_retriever() | |
| ) | |
| # 5. Streamlit UI | |
| st.title("π¦π RAG App via HF Spaces") | |
| query = st.text_input("Ask a question about your docs:") | |
| if query: | |
| with st.spinner("Generating answer via HF Space..."): | |
| answer = aqa_chain.run(query) | |
| st.markdown(f"**Answer:** {answer}") | |
| # NOTE: Deploy this to Hugging Face Spaces for fully-managed hosting. | |
| # Just push this file to your repo on HF and enable Streamlit space. | |