Spaces:

jackenmail
/

rag-embedder

Runtime error

App Files Files Community

jackenmail commited on 22 days ago

Commit

41ac698

verified ·

1 Parent(s): 07f4b32

Upload 3 files

Browse files

Files changed (3) hide show

app/gradio_space.py +42 -0
app/langchain_rag.py +104 -0
app/rag_app.py +126 -0

app/gradio_space.py ADDED Viewed

	@@ -0,0 +1,42 @@

+# ─────────────────────────────────────────────────────────────
+# app/gradio_space.py
+# Deploy this as your HF Gradio Space
+# 1. Go to https://huggingface.co/spaces → New Space
+# 2. SDK: Gradio, Visibility: Public
+# 3. Upload this file as app.py
+# 4. Upload requirements.txt with: sentence-transformers torch
+# ─────────────────────────────────────────────────────────────
+import gradio as gr
+from sentence_transformers import SentenceTransformer
+import os
+# ── Load your model ───────────────────────────────────────────
+MODEL_NAME = os.getenv("MODEL_NAME", "your-username/rag-embedder")
+print(f"Loading model: {MODEL_NAME}")
+model = SentenceTransformer(MODEL_NAME)
+print("Model ready!")
+# ── Embed function ────────────────────────────────────────────
+def embed(text: str):
+    if not text.strip():
+        return []
+    vector = model.encode(text)
+    return vector.tolist()
+# ── Gradio UI ─────────────────────────────────────────────────
+demo = gr.Interface(
+    fn          = embed,
+    inputs      = gr.Textbox(label="Input Text", placeholder="Enter text to embed..."),
+    outputs     = gr.JSON(label="Embedding Vector"),
+    title       = "RAG Embedder API",
+    description = f"Embedding API powered by {MODEL_NAME}",
+    examples    = [
+        ["What is the refund policy?"],
+        ["How do I reset my password?"],
+        ["When is customer support available?"]
+    ]
+)
+demo.launch()

app/langchain_rag.py ADDED Viewed

	@@ -0,0 +1,104 @@

+# ─────────────────────────────────────────────────────────────
+# app/langchain_rag.py
+# LangChain version of the RAG pipeline
+# ─────────────────────────────────────────────────────────────
+import os
+import sys
+import numpy as np
+from dotenv import load_dotenv
+load_dotenv()
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from gradio_client import Client
+from langchain.embeddings.base      import Embeddings
+from langchain_community.vectorstores import FAISS
+from langchain_community.llms        import HuggingFaceHub
+from langchain.chains                import RetrievalQA
+from langchain.schema                import Document
+# ── Wrap your HF Gradio Space as LangChain Embeddings ────────
+class GradioEmbeddings(Embeddings):
+    """
+    LangChain-compatible wrapper around your
+    HF Gradio Space embedding API.
+    """
+    def __init__(self, space: str = None):
+        self.space  = space or os.getenv("GRADIO_SPACE", "your-username/rag-embedder-app")
+        self.client = Client(self.space)
+        print(f"Connected to Gradio Space: {self.space}")
+    def embed_documents(self, texts: list) -> list:
+        return [self.client.predict(t, api_name="/predict") for t in texts]
+    def embed_query(self, text: str) -> list:
+        return self.client.predict(text, api_name="/predict")
+# ── Load documents ────────────────────────────────────────────
+def load_documents(path: str) -> list:
+    with open(path) as f:
+        lines = [line.strip() for line in f if line.strip()]
+    return [Document(page_content=line) for line in lines]
+# ── Build LangChain RAG chain ─────────────────────────────────
+def build_rag_chain():
+    docs_path = os.getenv("DOCS_PATH", "data/sample_docs.txt")
+    hf_token  = os.getenv("HF_TOKEN",  "")
+    llm_model = os.getenv("LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.1")
+    print("Setting up LangChain RAG pipeline...")
+    # Load docs
+    documents  = load_documents(docs_path)
+    print(f"Loaded {len(documents)} documents")
+    # Embeddings via your HF Gradio Space
+    embeddings = GradioEmbeddings()
+    # Vector store
+    vectorstore = FAISS.from_documents(documents, embeddings)
+    retriever   = vectorstore.as_retriever(search_kwargs={"k": 3})
+    # LLM via HF Hub
+    llm = HuggingFaceHub(
+        repo_id               = llm_model,
+        huggingfacehub_api_token = hf_token,
+        model_kwargs          = {"max_new_tokens": 200, "temperature": 0.3}
+    )
+    # Full RAG chain
+    chain = RetrievalQA.from_chain_type(
+        llm       = llm,
+        retriever = retriever,
+        chain_type= "stuff",
+        return_source_documents = True
+    )
+    print("LangChain RAG chain ready!")
+    return chain
+# ── Run ───────────────────────────────────────────────────────
+if __name__ == "__main__":
+    chain = build_rag_chain()
+    questions = [
+        "What is the refund policy?",
+        "How do I reset my password?",
+        "When can I contact support?"
+    ]
+    print("\n" + "=" * 55)
+    for q in questions:
+        result  = chain({"query": q})
+        answer  = result["result"]
+        sources = [doc.page_content for doc in result["source_documents"]]
+        print(f"Q: {q}")
+        print(f"A: {answer}")
+        print(f"Sources: {sources[:2]}")
+        print("-" * 55)

app/rag_app.py ADDED Viewed

	@@ -0,0 +1,126 @@

+# ─────────────────────────────────────────────────────────────
+# app/rag_app.py
+# Main RAG application — runs locally, calls HF for everything
+# ─────────────────────────────────────────────────────────────
+import os
+import sys
+# Load .env file
+from dotenv import load_dotenv
+load_dotenv()
+# Add project root to path
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from utils.embedder  import HFEmbedder
+from utils.retriever import FAISSRetriever
+from utils.generator import HFGenerator
+# ── Config ────────────────────────────────────────────────────
+DOCS_PATH        = os.getenv("DOCS_PATH",        "data/sample_docs.txt")
+FAISS_INDEX_PATH = os.getenv("FAISS_INDEX_PATH", "vector_store/index.faiss")
+TOP_K            = 3
+# ── Load documents ────────────────────────────────────────────
+def load_documents(path: str) -> list:
+    if not os.path.exists(path):
+        raise FileNotFoundError(f"No documents found at {path}")
+    with open(path) as f:
+        docs = [line.strip() for line in f if line.strip()]
+    print(f"Loaded {len(docs)} documents from {path}")
+    return docs
+# ── Build or load index ───────────────────────────────────────
+def setup_retriever(embedder: HFEmbedder, force_rebuild: bool = False) -> FAISSRetriever:
+    retriever = FAISSRetriever(FAISS_INDEX_PATH)
+    if os.path.exists(FAISS_INDEX_PATH) and not force_rebuild:
+        print("Loading existing FAISS index...")
+        retriever.load()
+    else:
+        print("Building new FAISS index...")
+        docs       = load_documents(DOCS_PATH)
+        embeddings = embedder.embed_batch(docs)
+        retriever.build(docs, embeddings)
+        retriever.save()
+    return retriever
+# ── Main RAG function ─────────────────────────────────────────
+class RAGPipeline:
+    def __init__(self, force_rebuild: bool = False):
+        print("\n" + "=" * 55)
+        print("  RAG Pipeline — Your Own HF Model")
+        print("=" * 55)
+        # Initialize components
+        self.embedder  = HFEmbedder()
+        self.retriever = setup_retriever(self.embedder, force_rebuild)
+        self.generator = HFGenerator()
+        print("\nAll components ready!\n")
+    def ask(self, question: str, verbose: bool = True) -> dict:
+        """Ask a question and get an answer grounded in your documents."""
+        if verbose:
+            print(f"Question : {question}")
+        # Step 1: Embed query
+        query_vec = self.embedder.embed(question)
+        # Step 2: Retrieve relevant chunks
+        chunks = self.retriever.search(query_vec, top_k=TOP_K)
+        if verbose:
+            print(f"Retrieved : {[c['text'][:60] for c in chunks]}")
+        # Step 3: Generate answer
+        answer = self.generator.generate(question, chunks)
+        if verbose:
+            print(f"Answer   : {answer}\n")
+        return {
+            "question": question,
+            "answer"  : answer,
+            "sources" : [c["text"] for c in chunks]
+        }
+# ── Run interactively ─────────────────────────────────────────
+if __name__ == "__main__":
+    rag = RAGPipeline()
+    # Demo questions
+    demo_questions = [
+        "What is the refund policy?",
+        "How do I reset my password?",
+        "When can I contact support?",
+        "How long can I return a product?"
+    ]
+    print("=" * 55)
+    print("  Demo Questions")
+    print("=" * 55)
+    for q in demo_questions:
+        result = rag.ask(q)
+        print(f"Q: {result['question']}")
+        print(f"A: {result['answer']}")
+        print("-" * 55)
+    # Interactive mode
+    print("\nInteractive mode — type your question (or 'quit' to exit)")
+    while True:
+        user_input = input("\nYou: ").strip()
+        if user_input.lower() in ["quit", "exit", "q"]:
+            print("Goodbye!")
+            break
+        if user_input:
+            result = rag.ask(user_input)
+            print(f"Bot: {result['answer']}")