Mavhas
/

RAG

Model card Files Files and versions

xet

Community

Mavhas commited on Feb 10, 2025

Commit

826a096

verified ·

1 Parent(s): 550b817

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -0

app.py CHANGED Viewed

	@@ -0,0 +1,89 @@

+!pip install streamlit langchain chromadb unstructured faiss-cpu sentence_transformers PyPDF2 groq
+!pip install -U langchain-community
+import os
+os.environ["GROQ_API_KEY"] = "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"  # Replace with your key
+import streamlit as st
+from langchain.document_loaders import PyPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.embeddings import SentenceTransformerEmbeddings
+from langchain.vectorstores import FAISS
+import os
+from groq import Groq
+# Load PDF (with error handling)
+def load_pdf(uploaded_file):
+    try:
+        loader = PyPDFLoader(uploaded_file)
+        documents = loader.load()
+        return documents
+    except Exception as e:
+        st.error(f"Error loading PDF: {e}")
+        return None
+# Chunking (with error handling)
+def chunk_text(documents):
+    try:
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+        chunks = text_splitter.split_documents(documents)
+        return chunks
+    except Exception as e:
+        st.error(f"Error chunking text: {e}")
+        return None
+# Embeddings and Vectorstore (with error handling)
+def create_embeddings_and_store(chunks):
+    try:
+        embeddings = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2")
+        db = FAISS.from_documents(chunks, embeddings)
+        return db
+    except Exception as e:
+        st.error(f"Error creating embeddings: {e}")
+        return None
+# Groq interaction (with more robust error handling)
+def query_groq(query, db):
+    try:
+        docs = db.similarity_search(query)
+        context = "\n".join([doc.page_content for doc in docs])
+        client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+        if not client.api_key:  # Check if API key is set
+            st.error("GROQ_API_KEY environment variable is not set.")
+            return None
+        prompt = f"""Use the following context to answer the question: {query}\n\nContext:\n{context}"""
+        chat_completion = client.chat.completions.create(
+            messages=[{"role": "user", "content": prompt}],
+            model="llama-3.3-70b-versatile",  # Or other suitable open-source model
+        )
+        return chat_completion.choices[0].message.content
+    except Exception as e:
+        st.error(f"Error querying Groq: {e}")
+        return None
+# Streamlit app
+st.title("RAG Application")
+uploaded_file = st.file_uploader("Upload PDF", type="pdf")
+if uploaded_file is not None:
+    with st.spinner("Processing PDF..."):
+        documents = load_pdf(uploaded_file)
+        if documents: # Check if PDF loaded successfully
+            chunks = chunk_text(documents)
+            if chunks: # Check if chunks were created successfully
+                db = create_embeddings_and_store(chunks)
+                if db: # Check if embeddings were created successfully
+                    st.success("PDF processed!")
+                    query = st.text_area("Enter your query")
+                    if st.button("Submit"):
+                        if query:
+                            with st.spinner("Querying..."):
+                                answer = query_groq(query, db)
+                                if answer: # Check if query was successful
+                                    st.write(answer)
+                        else:
+                            st.warning("Please enter a query.")