Muqadas-13 commited on
Commit
d2ab873
Β·
verified Β·
1 Parent(s): af7bae4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -2
app.py CHANGED
@@ -7,14 +7,17 @@ import numpy as np
7
  from sentence_transformers import SentenceTransformer
8
  from groq import Groq
9
 
10
- # βœ… Get Groq API key
11
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
12
 
 
13
  embed_model = SentenceTransformer("all-MiniLM-L6-v2")
 
 
14
  INDEX = faiss.IndexFlatL2(384)
15
  stored_chunks = []
16
 
17
- # UI Styling
18
  st.markdown("""
19
  <style>
20
  .main-title {
@@ -39,6 +42,7 @@ st.markdown("""
39
 
40
  st.markdown('<div class="main-title">πŸ“„ Smart RAG Document QA Assistant</div>', unsafe_allow_html=True)
41
 
 
42
  def extract_text(file):
43
  if file.type == "application/pdf":
44
  reader = PdfReader(file)
@@ -50,20 +54,24 @@ def extract_text(file):
50
  return file.read().decode("utf-8")
51
  return ""
52
 
 
53
  def chunk_text(text, chunk_size=200):
54
  words = text.split()
55
  return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
56
 
 
57
  def store_embeddings(chunks):
58
  vectors = embed_model.encode(chunks)
59
  INDEX.add(np.array(vectors, dtype=np.float32))
60
  stored_chunks.extend(chunks)
61
 
 
62
  def retrieve_similar_chunks(query, top_k=3):
63
  query_vector = embed_model.encode([query])
64
  distances, indices = INDEX.search(np.array(query_vector, dtype=np.float32), top_k)
65
  return [stored_chunks[i] for i in indices[0]]
66
 
 
67
  def get_llm_answer(query, context):
68
  prompt = f"Answer the question based on the following context:\n\n{context}\n\nQuestion: {query}"
69
  chat_completion = client.chat.completions.create(
@@ -72,9 +80,11 @@ def get_llm_answer(query, context):
72
  )
73
  return chat_completion.choices[0].message.content
74
 
 
75
  uploaded_file = st.file_uploader("πŸ“ Upload your document", type=["pdf", "docx", "txt"])
76
  query = st.text_input("πŸ’¬ Ask a question about your document")
77
 
 
78
  if uploaded_file:
79
  with st.spinner("Processing file..."):
80
  text = extract_text(uploaded_file)
@@ -82,10 +92,12 @@ if uploaded_file:
82
  store_embeddings(chunks)
83
  st.success("βœ… Document uploaded and indexed!")
84
 
 
85
  if st.button("🧠 Get Answer") and query:
86
  with st.spinner("Thinking..."):
87
  context = "\n\n".join(retrieve_similar_chunks(query))
88
  answer = get_llm_answer(query, context)
89
  st.markdown(f'<div class="card"><b>Answer:</b><br>{answer}</div>', unsafe_allow_html=True)
90
 
 
91
  st.markdown("<br><center style='color: grey;'>Built by Muqadas with ❀️ using Streamlit + Groq + FAISS</center>", unsafe_allow_html=True)
 
7
  from sentence_transformers import SentenceTransformer
8
  from groq import Groq
9
 
10
+ # βœ… Initialize Groq client with API key
11
  client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
12
 
13
+ # βœ… Load embedding model
14
  embed_model = SentenceTransformer("all-MiniLM-L6-v2")
15
+
16
+ # βœ… Initialize FAISS index
17
  INDEX = faiss.IndexFlatL2(384)
18
  stored_chunks = []
19
 
20
+ # βœ… UI Styling
21
  st.markdown("""
22
  <style>
23
  .main-title {
 
42
 
43
  st.markdown('<div class="main-title">πŸ“„ Smart RAG Document QA Assistant</div>', unsafe_allow_html=True)
44
 
45
+ # βœ… Extract text from various document types
46
  def extract_text(file):
47
  if file.type == "application/pdf":
48
  reader = PdfReader(file)
 
54
  return file.read().decode("utf-8")
55
  return ""
56
 
57
+ # βœ… Split text into chunks
58
  def chunk_text(text, chunk_size=200):
59
  words = text.split()
60
  return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
61
 
62
+ # βœ… Store vector embeddings in FAISS
63
  def store_embeddings(chunks):
64
  vectors = embed_model.encode(chunks)
65
  INDEX.add(np.array(vectors, dtype=np.float32))
66
  stored_chunks.extend(chunks)
67
 
68
+ # βœ… Retrieve similar chunks from FAISS
69
  def retrieve_similar_chunks(query, top_k=3):
70
  query_vector = embed_model.encode([query])
71
  distances, indices = INDEX.search(np.array(query_vector, dtype=np.float32), top_k)
72
  return [stored_chunks[i] for i in indices[0]]
73
 
74
+ # βœ… Ask Groq LLaMA3 using context
75
  def get_llm_answer(query, context):
76
  prompt = f"Answer the question based on the following context:\n\n{context}\n\nQuestion: {query}"
77
  chat_completion = client.chat.completions.create(
 
80
  )
81
  return chat_completion.choices[0].message.content
82
 
83
+ # βœ… Streamlit UI components
84
  uploaded_file = st.file_uploader("πŸ“ Upload your document", type=["pdf", "docx", "txt"])
85
  query = st.text_input("πŸ’¬ Ask a question about your document")
86
 
87
+ # βœ… Process document
88
  if uploaded_file:
89
  with st.spinner("Processing file..."):
90
  text = extract_text(uploaded_file)
 
92
  store_embeddings(chunks)
93
  st.success("βœ… Document uploaded and indexed!")
94
 
95
+ # βœ… Ask question and get answer
96
  if st.button("🧠 Get Answer") and query:
97
  with st.spinner("Thinking..."):
98
  context = "\n\n".join(retrieve_similar_chunks(query))
99
  answer = get_llm_answer(query, context)
100
  st.markdown(f'<div class="card"><b>Answer:</b><br>{answer}</div>', unsafe_allow_html=True)
101
 
102
+ # βœ… Footer
103
  st.markdown("<br><center style='color: grey;'>Built by Muqadas with ❀️ using Streamlit + Groq + FAISS</center>", unsafe_allow_html=True)