Talha812 commited on
Commit
21d68c0
·
verified ·
1 Parent(s): 11ae5d3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from sentence_transformers import SentenceTransformer, util
4
+ from groq import Groq
5
+ from PyPDF2 import PdfReader
6
+
7
+ # Initialize the retriever and Groq client
8
+ retriever = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
9
+ client = Groq(api_key="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")
10
+
11
+ # Knowledge base (documents) and embeddings
12
+ documents = [
13
+ "Retrieval-Augmented Generation (RAG) is an AI framework that combines the strengths of retrieval-based and generative models.",
14
+ "The main components of a RAG system are the retriever and the generator.",
15
+ "A key benefit of Retrieval-Augmented Generation is that it can produce more accurate responses compared to standalone generative models.",
16
+ "The retrieval process in a RAG system often relies on embedding-based models, like Sentence-BERT or DPR.",
17
+ "Common use cases of RAG include chatbots, customer support systems, and knowledge retrieval for business intelligence."
18
+ ]
19
+ document_embeddings = retriever.encode(documents, convert_to_tensor=True)
20
+
21
+ # Function to retrieve top relevant document
22
+ def retrieve(query, top_k=1):
23
+ query_embedding = retriever.encode(query, convert_to_tensor=True)
24
+ hits = util.semantic_search(query_embedding, document_embeddings, top_k=top_k)
25
+ top_docs = [documents[hit['corpus_id']] for hit in hits[0]]
26
+ return top_docs[0] if hits[0] else None
27
+
28
+ # Function to generate response using Groq
29
+ def generate_response(query, context):
30
+ response = client.chat.completions.create(
31
+ messages=[
32
+ {
33
+ "role": "user",
34
+ "content": f"Context: {context} Question: {query} Answer:"
35
+ }
36
+ ],
37
+ model="gemma2-9b-it"
38
+ )
39
+ return response.choices[0].message.content
40
+
41
+ # Function to handle PDF upload and text extraction
42
+ def extract_text_from_pdf(file):
43
+ pdf_reader = PdfReader(file)
44
+ text = ""
45
+ for page in pdf_reader.pages:
46
+ text += page.extract_text()
47
+ return text
48
+
49
+ # Function to update knowledge base with new content from PDF
50
+ def update_knowledge_base(pdf_text):
51
+ global documents, document_embeddings
52
+ documents.append(pdf_text)
53
+ document_embeddings = retriever.encode(documents, convert_to_tensor=True)
54
+
55
+ # Streamlit app layout
56
+ st.title("RAG-based Question Answering App")
57
+ st.write("Upload a PDF, ask questions based on its content, and get answers!")
58
+
59
+ # Upload PDF file
60
+ uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
61
+ if uploaded_file:
62
+ pdf_text = extract_text_from_pdf(uploaded_file)
63
+ update_knowledge_base(pdf_text)
64
+ st.write("PDF content successfully added to the knowledge base.")
65
+
66
+ # Question input
67
+ question = st.text_input("Enter your question:")
68
+ if question:
69
+ retrieved_context = retrieve(question)
70
+ if retrieved_context:
71
+ answer = generate_response(question, retrieved_context)
72
+ else:
73
+ answer = "I have no knowledge about this topic."
74
+ st.write("Answer:", answer)