Mavhas commited on
Commit
db2f74d
·
verified ·
1 Parent(s): 95a730b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import streamlit as st
3
+ from langchain.document_loaders import PyPDFLoader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.embeddings import SentenceTransformerEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ import os
8
+ from groq import Groq
9
+
10
+ # Load PDF (with error handling)
11
+ def load_pdf(uploaded_file):
12
+ try:
13
+ loader = PyPDFLoader(uploaded_file)
14
+ documents = loader.load()
15
+ return documents
16
+ except Exception as e:
17
+ st.error(f"Error loading PDF: {e}")
18
+ return None
19
+
20
+ # Chunking (with error handling)
21
+ def chunk_text(documents):
22
+ try:
23
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
24
+ chunks = text_splitter.split_documents(documents)
25
+ return chunks
26
+ except Exception as e:
27
+ st.error(f"Error chunking text: {e}")
28
+ return None
29
+
30
+ # Embeddings and Vectorstore (with error handling)
31
+ def create_embeddings_and_store(chunks):
32
+ try:
33
+ embeddings = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2") # Or other suitable model
34
+ db = FAISS.from_documents(chunks, embeddings)
35
+ return db
36
+ except Exception as e:
37
+ st.error(f"Error creating embeddings: {e}")
38
+ return None
39
+
40
+ # Groq interaction (with more robust error handling)
41
+ def query_groq(query, db):
42
+ try:
43
+ docs = db.similarity_search(query) # Similarity search
44
+ context = "\n".join([doc.page_content for doc in docs])
45
+
46
+ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
47
+ if not client.api_key: # Check if API key is set
48
+ st.error("GROQ_API_KEY environment variable is not set. Set it as a Space secret.")
49
+ return None
50
+
51
+ prompt = f"""Use the following context to answer the question: {query}\n\nContext:\n{context}"""
52
+
53
+ chat_completion = client.chat.completions.create(
54
+ messages=[{"role": "user", "content": prompt}],
55
+ model="llama-3.3-70b-versatile", # Or other suitable open-source model compatible with Groq
56
+ )
57
+ return chat_completion.choices[0].message.content
58
+ except Exception as e:
59
+ st.error(f"Error querying Groq: {e}")
60
+ return None
61
+
62
+ # Streamlit app
63
+ st.title("RAG Application")
64
+
65
+ uploaded_file = st.file_uploader("Upload PDF", type="pdf")
66
+
67
+ if uploaded_file is not None:
68
+ with st.spinner("Processing PDF..."):
69
+ documents = load_pdf(uploaded_file)
70
+ if documents: # Check if PDF loaded successfully
71
+ chunks = chunk_text(documents)
72
+ if chunks: # Check if chunks were created successfully
73
+ db = create_embeddings_and_store(chunks)
74
+ if db: # Check if embeddings were created successfully
75
+ st.success("PDF processed!")
76
+
77
+ query = st.text_area("Enter your query")
78
+ if st.button("Submit"):
79
+ if query:
80
+ with st.spinner("Querying..."):
81
+ answer = query_groq(query, db)
82
+ if answer: # Check if query was successful
83
+ st.write(answer)
84
+ else:
85
+ st.warning("Please enter a query.")