Spaces:

Talha812
/

RAG-Based-App-QA

Sleeping

App Files Files Community

Talha812 commited on Dec 23, 2024

Commit

ff8d801

verified ·

1 Parent(s): 43089b4

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -18

app.py CHANGED Viewed

@@ -26,9 +26,12 @@ def chunk_text(text, chunk_size=500, chunk_overlap=50):
     return text_splitter.split_text(text)
 # Function to create embeddings and store them in FAISS
-def create_embeddings_and_store(chunks):
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-    vector_db = FAISS.from_texts(chunks, embedding=embeddings)
     return vector_db
 # Function to query the vector database and interact with Groq
@@ -48,27 +51,29 @@ def query_vector_db(query, vector_db):
     return chat_completion.choices[0].message.content
 # Streamlit app
-st.title("RAG-Based Application")
-# Upload PDF
-uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"], accept_multiple_files=True)
-if uploaded_file:
-    with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
-        temp_file.write(uploaded_file.read())
-        pdf_path = temp_file.name
-    # Extract text
-    text = extract_text_from_pdf(pdf_path)
-    st.write("PDF Text Extracted Successfully!")
-    # Chunk text
-    chunks = chunk_text(text)
-    st.write("Text Chunked Successfully!")
-    # Generate embeddings and store in FAISS
-    vector_db = create_embeddings_and_store(chunks)
-    st.write("Embeddings Generated and Stored Successfully!")
     # User query input
     user_query = st.text_input("Enter your query:")

     return text_splitter.split_text(text)
 # Function to create embeddings and store them in FAISS
+def create_embeddings_and_store(chunks, vector_db=None):
     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+    if vector_db is None:
+        vector_db = FAISS.from_texts(chunks, embedding=embeddings)
+    else:
+        vector_db.add_texts(chunks)
     return vector_db
 # Function to query the vector database and interact with Groq
     return chat_completion.choices[0].message.content
 # Streamlit app
+st.title("RAG-Based Application QA")
+# Upload PDFs
+uploaded_files = st.file_uploader("Upload PDF documents", type=["pdf"], accept_multiple_files=True)
+if uploaded_files:
+    vector_db = None  # Initialize an empty vector DB
+    for uploaded_file in uploaded_files:
+        with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
+            temp_file.write(uploaded_file.read())
+            pdf_path = temp_file.name
+        # Extract text
+        text = extract_text_from_pdf(pdf_path)
+        st.write(f"Text extracted from: {uploaded_file.name}")
+        # Chunk text
+        chunks = chunk_text(text)
+        st.write(f"Text chunked from: {uploaded_file.name}")
+        # Generate embeddings and store in FAISS
+        vector_db = create_embeddings_and_store(chunks, vector_db=vector_db)
+        st.write(f"Embeddings generated and stored for: {uploaded_file.name}")
     # User query input
     user_query = st.text_input("Enter your query:")