jackenmail commited on
Commit
41ac698
Β·
verified Β·
1 Parent(s): 07f4b32

Upload 3 files

Browse files
Files changed (3) hide show
  1. app/gradio_space.py +42 -0
  2. app/langchain_rag.py +104 -0
  3. app/rag_app.py +126 -0
app/gradio_space.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ─────────────────────────────────────────────────────────────
2
+ # app/gradio_space.py
3
+ # Deploy this as your HF Gradio Space
4
+ # 1. Go to https://huggingface.co/spaces β†’ New Space
5
+ # 2. SDK: Gradio, Visibility: Public
6
+ # 3. Upload this file as app.py
7
+ # 4. Upload requirements.txt with: sentence-transformers torch
8
+ # ─────────────────────────────────────────────────────────────
9
+
10
+ import gradio as gr
11
+ from sentence_transformers import SentenceTransformer
12
+ import os
13
+
14
+ # ── Load your model ───────────────────────────────────────────
15
+ MODEL_NAME = os.getenv("MODEL_NAME", "your-username/rag-embedder")
16
+
17
+ print(f"Loading model: {MODEL_NAME}")
18
+ model = SentenceTransformer(MODEL_NAME)
19
+ print("Model ready!")
20
+
21
+ # ── Embed function ────────────────────────────────────────────
22
+ def embed(text: str):
23
+ if not text.strip():
24
+ return []
25
+ vector = model.encode(text)
26
+ return vector.tolist()
27
+
28
+ # ── Gradio UI ─────────────────────────────────────────────────
29
+ demo = gr.Interface(
30
+ fn = embed,
31
+ inputs = gr.Textbox(label="Input Text", placeholder="Enter text to embed..."),
32
+ outputs = gr.JSON(label="Embedding Vector"),
33
+ title = "RAG Embedder API",
34
+ description = f"Embedding API powered by {MODEL_NAME}",
35
+ examples = [
36
+ ["What is the refund policy?"],
37
+ ["How do I reset my password?"],
38
+ ["When is customer support available?"]
39
+ ]
40
+ )
41
+
42
+ demo.launch()
app/langchain_rag.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ─────────────────────────────────────────────────────────────
2
+ # app/langchain_rag.py
3
+ # LangChain version of the RAG pipeline
4
+ # ─────────────────────────────────────────────────────────────
5
+
6
+ import os
7
+ import sys
8
+ import numpy as np
9
+ from dotenv import load_dotenv
10
+ load_dotenv()
11
+
12
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
13
+
14
+ from gradio_client import Client
15
+ from langchain.embeddings.base import Embeddings
16
+ from langchain_community.vectorstores import FAISS
17
+ from langchain_community.llms import HuggingFaceHub
18
+ from langchain.chains import RetrievalQA
19
+ from langchain.schema import Document
20
+
21
+
22
+ # ── Wrap your HF Gradio Space as LangChain Embeddings ────────
23
+ class GradioEmbeddings(Embeddings):
24
+ """
25
+ LangChain-compatible wrapper around your
26
+ HF Gradio Space embedding API.
27
+ """
28
+
29
+ def __init__(self, space: str = None):
30
+ self.space = space or os.getenv("GRADIO_SPACE", "your-username/rag-embedder-app")
31
+ self.client = Client(self.space)
32
+ print(f"Connected to Gradio Space: {self.space}")
33
+
34
+ def embed_documents(self, texts: list) -> list:
35
+ return [self.client.predict(t, api_name="/predict") for t in texts]
36
+
37
+ def embed_query(self, text: str) -> list:
38
+ return self.client.predict(text, api_name="/predict")
39
+
40
+
41
+ # ── Load documents ────────────────────────────────────────────
42
+ def load_documents(path: str) -> list:
43
+ with open(path) as f:
44
+ lines = [line.strip() for line in f if line.strip()]
45
+ return [Document(page_content=line) for line in lines]
46
+
47
+
48
+ # ── Build LangChain RAG chain ─────────────────────────────────
49
+ def build_rag_chain():
50
+ docs_path = os.getenv("DOCS_PATH", "data/sample_docs.txt")
51
+ hf_token = os.getenv("HF_TOKEN", "")
52
+ llm_model = os.getenv("LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.1")
53
+
54
+ print("Setting up LangChain RAG pipeline...")
55
+
56
+ # Load docs
57
+ documents = load_documents(docs_path)
58
+ print(f"Loaded {len(documents)} documents")
59
+
60
+ # Embeddings via your HF Gradio Space
61
+ embeddings = GradioEmbeddings()
62
+
63
+ # Vector store
64
+ vectorstore = FAISS.from_documents(documents, embeddings)
65
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
66
+
67
+ # LLM via HF Hub
68
+ llm = HuggingFaceHub(
69
+ repo_id = llm_model,
70
+ huggingfacehub_api_token = hf_token,
71
+ model_kwargs = {"max_new_tokens": 200, "temperature": 0.3}
72
+ )
73
+
74
+ # Full RAG chain
75
+ chain = RetrievalQA.from_chain_type(
76
+ llm = llm,
77
+ retriever = retriever,
78
+ chain_type= "stuff",
79
+ return_source_documents = True
80
+ )
81
+
82
+ print("LangChain RAG chain ready!")
83
+ return chain
84
+
85
+
86
+ # ── Run ───────────────────────────────────────────────────────
87
+ if __name__ == "__main__":
88
+ chain = build_rag_chain()
89
+
90
+ questions = [
91
+ "What is the refund policy?",
92
+ "How do I reset my password?",
93
+ "When can I contact support?"
94
+ ]
95
+
96
+ print("\n" + "=" * 55)
97
+ for q in questions:
98
+ result = chain({"query": q})
99
+ answer = result["result"]
100
+ sources = [doc.page_content for doc in result["source_documents"]]
101
+ print(f"Q: {q}")
102
+ print(f"A: {answer}")
103
+ print(f"Sources: {sources[:2]}")
104
+ print("-" * 55)
app/rag_app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ─────────────────────────────────────────────────────────────
2
+ # app/rag_app.py
3
+ # Main RAG application β€” runs locally, calls HF for everything
4
+ # ─────────────────────────────────────────────────────────────
5
+
6
+ import os
7
+ import sys
8
+
9
+ # Load .env file
10
+ from dotenv import load_dotenv
11
+ load_dotenv()
12
+
13
+ # Add project root to path
14
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
15
+
16
+ from utils.embedder import HFEmbedder
17
+ from utils.retriever import FAISSRetriever
18
+ from utils.generator import HFGenerator
19
+
20
+
21
+ # ── Config ────────────────────────────────────────────────────
22
+ DOCS_PATH = os.getenv("DOCS_PATH", "data/sample_docs.txt")
23
+ FAISS_INDEX_PATH = os.getenv("FAISS_INDEX_PATH", "vector_store/index.faiss")
24
+ TOP_K = 3
25
+
26
+
27
+ # ── Load documents ────────────────────────────────────────────
28
+ def load_documents(path: str) -> list:
29
+ if not os.path.exists(path):
30
+ raise FileNotFoundError(f"No documents found at {path}")
31
+ with open(path) as f:
32
+ docs = [line.strip() for line in f if line.strip()]
33
+ print(f"Loaded {len(docs)} documents from {path}")
34
+ return docs
35
+
36
+
37
+ # ── Build or load index ───────────────────────────────────────
38
+ def setup_retriever(embedder: HFEmbedder, force_rebuild: bool = False) -> FAISSRetriever:
39
+ retriever = FAISSRetriever(FAISS_INDEX_PATH)
40
+
41
+ if os.path.exists(FAISS_INDEX_PATH) and not force_rebuild:
42
+ print("Loading existing FAISS index...")
43
+ retriever.load()
44
+ else:
45
+ print("Building new FAISS index...")
46
+ docs = load_documents(DOCS_PATH)
47
+ embeddings = embedder.embed_batch(docs)
48
+ retriever.build(docs, embeddings)
49
+ retriever.save()
50
+
51
+ return retriever
52
+
53
+
54
+ # ── Main RAG function ─────────────────────────────────────────
55
+ class RAGPipeline:
56
+ def __init__(self, force_rebuild: bool = False):
57
+ print("\n" + "=" * 55)
58
+ print(" RAG Pipeline β€” Your Own HF Model")
59
+ print("=" * 55)
60
+
61
+ # Initialize components
62
+ self.embedder = HFEmbedder()
63
+ self.retriever = setup_retriever(self.embedder, force_rebuild)
64
+ self.generator = HFGenerator()
65
+ print("\nAll components ready!\n")
66
+
67
+ def ask(self, question: str, verbose: bool = True) -> dict:
68
+ """Ask a question and get an answer grounded in your documents."""
69
+
70
+ if verbose:
71
+ print(f"Question : {question}")
72
+
73
+ # Step 1: Embed query
74
+ query_vec = self.embedder.embed(question)
75
+
76
+ # Step 2: Retrieve relevant chunks
77
+ chunks = self.retriever.search(query_vec, top_k=TOP_K)
78
+
79
+ if verbose:
80
+ print(f"Retrieved : {[c['text'][:60] for c in chunks]}")
81
+
82
+ # Step 3: Generate answer
83
+ answer = self.generator.generate(question, chunks)
84
+
85
+ if verbose:
86
+ print(f"Answer : {answer}\n")
87
+
88
+ return {
89
+ "question": question,
90
+ "answer" : answer,
91
+ "sources" : [c["text"] for c in chunks]
92
+ }
93
+
94
+
95
+ # ── Run interactively ─────────────────────────────────────────
96
+ if __name__ == "__main__":
97
+ rag = RAGPipeline()
98
+
99
+ # Demo questions
100
+ demo_questions = [
101
+ "What is the refund policy?",
102
+ "How do I reset my password?",
103
+ "When can I contact support?",
104
+ "How long can I return a product?"
105
+ ]
106
+
107
+ print("=" * 55)
108
+ print(" Demo Questions")
109
+ print("=" * 55)
110
+
111
+ for q in demo_questions:
112
+ result = rag.ask(q)
113
+ print(f"Q: {result['question']}")
114
+ print(f"A: {result['answer']}")
115
+ print("-" * 55)
116
+
117
+ # Interactive mode
118
+ print("\nInteractive mode β€” type your question (or 'quit' to exit)")
119
+ while True:
120
+ user_input = input("\nYou: ").strip()
121
+ if user_input.lower() in ["quit", "exit", "q"]:
122
+ print("Goodbye!")
123
+ break
124
+ if user_input:
125
+ result = rag.ask(user_input)
126
+ print(f"Bot: {result['answer']}")