# app_hf_space.py
import streamlit as st
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores.faiss import FAISS
from langchain.chains import RetrievalQA
from langchain_huggingface.llms import HuggingFacePipeline

# --- Configuration ---
# Use Hugging Face inference API via the `pipeline` from langchain-huggingface
EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2"
LLM_MODEL_ID = "google/flan-t5-small"
INDEX_DIR = "faiss_index"

# 1. Initialize embeddings via HF Inference API
embeddings = HuggingFaceEmbeddings(
    model_name=EMBED_MODEL_ID,
    cache_dir=".hf_cache"
)

# 2. Load FAISS index
store = FAISS.load_local(
    INDEX_DIR,
    embeddings
)

# 3. Initialize HF LLM via pipeline (inference API)
llm = HuggingFacePipeline.from_model_id(
    model_id=LLM_MODEL_ID,
    task="text2text-generation"
)

# 4. Build RetrievalQA chain
aqa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=store.as_retriever()
)

# 5. Streamlit UI
st.title("🦜🔗 RAG App via HF Spaces")
query = st.text_input("Ask a question about your docs:")

if query:
    with st.spinner("Generating answer via HF Space..."):
        answer = aqa_chain.run(query)
    st.markdown(f"**Answer:** {answer}")

# NOTE: Deploy this to Hugging Face Spaces for fully-managed hosting.
# Just push this file to your repo on HF and enable Streamlit space.