Mavhas commited on
Commit
826a096
·
verified ·
1 Parent(s): 550b817

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -0
app.py CHANGED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ !pip install streamlit langchain chromadb unstructured faiss-cpu sentence_transformers PyPDF2 groq
2
+ !pip install -U langchain-community
3
+ import os
4
+ os.environ["GROQ_API_KEY"] = "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" # Replace with your key
5
+ import streamlit as st
6
+ from langchain.document_loaders import PyPDFLoader
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain.embeddings import SentenceTransformerEmbeddings
9
+ from langchain.vectorstores import FAISS
10
+ import os
11
+ from groq import Groq
12
+
13
+ # Load PDF (with error handling)
14
+ def load_pdf(uploaded_file):
15
+ try:
16
+ loader = PyPDFLoader(uploaded_file)
17
+ documents = loader.load()
18
+ return documents
19
+ except Exception as e:
20
+ st.error(f"Error loading PDF: {e}")
21
+ return None
22
+
23
+ # Chunking (with error handling)
24
+ def chunk_text(documents):
25
+ try:
26
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
27
+ chunks = text_splitter.split_documents(documents)
28
+ return chunks
29
+ except Exception as e:
30
+ st.error(f"Error chunking text: {e}")
31
+ return None
32
+
33
+ # Embeddings and Vectorstore (with error handling)
34
+ def create_embeddings_and_store(chunks):
35
+ try:
36
+ embeddings = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2")
37
+ db = FAISS.from_documents(chunks, embeddings)
38
+ return db
39
+ except Exception as e:
40
+ st.error(f"Error creating embeddings: {e}")
41
+ return None
42
+
43
+ # Groq interaction (with more robust error handling)
44
+ def query_groq(query, db):
45
+ try:
46
+ docs = db.similarity_search(query)
47
+ context = "\n".join([doc.page_content for doc in docs])
48
+
49
+ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
50
+ if not client.api_key: # Check if API key is set
51
+ st.error("GROQ_API_KEY environment variable is not set.")
52
+ return None
53
+
54
+ prompt = f"""Use the following context to answer the question: {query}\n\nContext:\n{context}"""
55
+
56
+ chat_completion = client.chat.completions.create(
57
+ messages=[{"role": "user", "content": prompt}],
58
+ model="llama-3.3-70b-versatile", # Or other suitable open-source model
59
+ )
60
+ return chat_completion.choices[0].message.content
61
+ except Exception as e:
62
+ st.error(f"Error querying Groq: {e}")
63
+ return None
64
+
65
+
66
+ # Streamlit app
67
+ st.title("RAG Application")
68
+
69
+ uploaded_file = st.file_uploader("Upload PDF", type="pdf")
70
+
71
+ if uploaded_file is not None:
72
+ with st.spinner("Processing PDF..."):
73
+ documents = load_pdf(uploaded_file)
74
+ if documents: # Check if PDF loaded successfully
75
+ chunks = chunk_text(documents)
76
+ if chunks: # Check if chunks were created successfully
77
+ db = create_embeddings_and_store(chunks)
78
+ if db: # Check if embeddings were created successfully
79
+ st.success("PDF processed!")
80
+
81
+ query = st.text_area("Enter your query")
82
+ if st.button("Submit"):
83
+ if query:
84
+ with st.spinner("Querying..."):
85
+ answer = query_groq(query, db)
86
+ if answer: # Check if query was successful
87
+ st.write(answer)
88
+ else:
89
+ st.warning("Please enter a query.")