File size: 1,461 Bytes
0670049
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# app_hf_space.py
import streamlit as st
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores.faiss import FAISS
from langchain.chains import RetrievalQA
from langchain_huggingface.llms import HuggingFacePipeline

# --- Configuration ---
# Use Hugging Face inference API via the `pipeline` from langchain-huggingface
EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
LLM_MODEL_ID = "google/flan-t5-small"
INDEX_DIR = "faiss_index"

# 1. Initialize embeddings via HF Inference API
embeddings = HuggingFaceEmbeddings(
    model_name=EMBED_MODEL_ID,
    cache_dir=".hf_cache"
)

# 2. Load FAISS index
store = FAISS.load_local(
    INDEX_DIR,
    embeddings
)

# 3. Initialize HF LLM via pipeline (inference API)
llm = HuggingFacePipeline.from_model_id(
    model_id=LLM_MODEL_ID,
    task="text2text-generation"
)

# 4. Build RetrievalQA chain
aqa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=store.as_retriever()
)

# 5. Streamlit UI
st.title("🦜🔗 RAG App via HF Spaces")
query = st.text_input("Ask a question about your docs:")

if query:
    with st.spinner("Generating answer via HF Space..."):
        answer = aqa_chain.run(query)
    st.markdown(f"**Answer:** {answer}")

# NOTE: Deploy this to Hugging Face Spaces for fully-managed hosting.
# Just push this file to your repo on HF and enable Streamlit space.