Spaces:

navjotk
/

RAG_TextileVision

Sleeping

App Files Files Community

navjotk commited on Jun 30, 2025

Commit

290b82a

verified ·

1 Parent(s): 09f7129

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -32

app.py CHANGED Viewed

@@ -2,8 +2,6 @@ import os
 from pathlib import Path
 import gradio as gr
-from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
-from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_core.prompts import PromptTemplate
@@ -12,49 +10,31 @@ from langchain.llms import HuggingFacePipeline
 from transformers import pipeline
 # Constants
-DATA_PATH = "data/"
-DB_FAISS_PATH = "vectorstore/db_faiss"
 EMBEDDING_MODEL_NAME = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
-MODEL_NAME = "MBZUAI/LaMini-Flan-T5-783M"  # Light model for CPU
-CHUNK_SIZE = 1500
-CHUNK_OVERLAP = 150
-# Step 1: Load PDF documents and split into chunks
-def load_documents():
-    loader = DirectoryLoader(DATA_PATH, glob="*.pdf", loader_cls=PyPDFLoader)
-    documents = loader.load()
-    splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
-    return splitter.split_documents(documents)
-# Step 2: Create vectorstore if not exists
-def ensure_vector_store():
-    if not Path(DB_FAISS_PATH).exists():
-        print("Creating new vector store...")
-        documents = load_documents()
-        embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
-        db = FAISS.from_documents(documents, embeddings)
-        db.save_local(DB_FAISS_PATH)
-    else:
-        print("Loading existing vector store...")
     embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
     return FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)
-# Step 3: Load lightweight LLM using HuggingFace pipeline
 def load_llm():
     pipe = pipeline("text2text-generation", model=MODEL_NAME)
     return HuggingFacePipeline(pipeline=pipe)
-# Step 4: Setup QA chain
 def setup_chain():
     prompt_template = """
     Use the following context to answer the question.
     If the answer is not in the context, just say you don't know.
     Context: {context}
     Question: {question}
     """
     prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
-    retriever = ensure_vector_store().as_retriever(search_kwargs={"k": 3})
     llm = load_llm()
     return RetrievalQA.from_chain_type(
         llm=llm,
@@ -66,12 +46,17 @@ def setup_chain():
 qa_chain = setup_chain()
-# Step 5: Gradio Interface
 def rag_bot(query):
     result = qa_chain.invoke({"query": query})
     return result["result"]
-demo = gr.Interface(fn=rag_bot, inputs="text", outputs="text",
-                    title="TextileVision",
-                    description="Ask queries related to TextileVision")
 demo.launch()

 from pathlib import Path
 import gradio as gr
 from langchain_community.vectorstores import FAISS
 from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_core.prompts import PromptTemplate
 from transformers import pipeline
 # Constants
+DB_FAISS_PATH = "vectorstore/db_faiss"  # Pre-generated FAISS directory
 EMBEDDING_MODEL_NAME = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
+MODEL_NAME = "MBZUAI/LaMini-Flan-T5-783M"  # Lightweight CPU-friendly model
+# Step 1: Load FAISS vectorstore (already created offline)
+def load_vector_store():
     embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
     return FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)
+# Step 2: Load lightweight HuggingFace model (no token needed)
 def load_llm():
     pipe = pipeline("text2text-generation", model=MODEL_NAME)
     return HuggingFacePipeline(pipeline=pipe)
+# Step 3: Setup QA chain
 def setup_chain():
     prompt_template = """
     Use the following context to answer the question.
     If the answer is not in the context, just say you don't know.
     Context: {context}
     Question: {question}
     """
     prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
+    retriever = load_vector_store().as_retriever(search_kwargs={"k": 3})
     llm = load_llm()
     return RetrievalQA.from_chain_type(
         llm=llm,
 qa_chain = setup_chain()
+# Step 4: Gradio Interface
 def rag_bot(query):
     result = qa_chain.invoke({"query": query})
     return result["result"]
+# Step 5: Launch Interface
+demo = gr.Interface(
+    fn=rag_bot,
+    inputs="text",
+    outputs="text",
+    title="TextileVision: AI Chatbot",
+    description="Ask queries about loom speed, yarn mixing, knitting prediction, and textile operations."
+)
 demo.launch()