Spaces:

Anas12-345
/

rag-model

Sleeping

App Files Files Community

Anas12-345 commited on Dec 25, 2024

Commit

9a2dfaa

verified ·

1 Parent(s): 4fc2a80

Create app.py

Browse files

Files changed (1) hide show

app.py +73 -0

app.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import os
+import PyPDF2
+import faiss
+import streamlit as st
+from sentence_transformers import SentenceTransformer
+from groq import Groq
+# Set up Groq client
+client = Groq(api_key="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")
+# Initialize model and FAISS index
+embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
+index = faiss.IndexFlatL2(384)  # Adjust dimension to match the embedding size
+# PDF text extraction
+def extract_text_from_pdf(pdf_file):
+    pdf_reader = PyPDF2.PdfReader(pdf_file)
+    text = ""
+    for page in pdf_reader.pages:
+        text += page.extract_text()
+    return text
+# Text chunking
+def chunk_text(text, chunk_size=500):
+    return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
+# Embed and store in FAISS
+def embed_and_store(chunks):
+    embeddings = embedding_model.encode(chunks)
+    index.add(embeddings)
+    return embeddings
+# Retrieve relevant chunks
+def retrieve_chunks(query, top_k=5):
+    query_embedding = embedding_model.encode([query])
+    distances, indices = index.search(query_embedding, top_k)
+    return indices
+# Query Groq API
+def query_groq(prompt):
+    chat_completion = client.chat.completions.create(
+        messages=[{"role": "user", "content": prompt}],
+        model="llama3-8b-8192"
+    )
+    return chat_completion.choices[0].message.content
+# Streamlit UI
+def main():
+    st.title("RAG-based PDF QA System")
+    st.sidebar.header("Upload and Interact")
+    uploaded_file = st.sidebar.file_uploader("Upload a PDF", type=["pdf"])
+    if uploaded_file:
+        st.sidebar.success("PDF Uploaded Successfully!")
+        text = extract_text_from_pdf(uploaded_file)
+        chunks = chunk_text(text)
+        embed_and_store(chunks)
+        st.write("PDF content has been processed and stored.")
+    query = st.text_input("Enter your question:")
+    if query:
+        indices = retrieve_chunks(query)
+        relevant_chunks = [chunks[i] for i in indices[0]]
+        prompt = " ".join(relevant_chunks) + f"\n\nQuestion: {query}"
+        answer = query_groq(prompt)
+        st.write("### Answer:")
+        st.write(answer)
+if __name__ == "__main__":
+    main()