NHZ commited on
Commit
b5bd81f
·
verified ·
1 Parent(s): fd7e05e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -17
app.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  import streamlit as st
3
  import PyPDF2
4
  import requests
5
- import numpy as np
6
  import faiss
7
  from groq import Groq
8
 
@@ -36,21 +36,13 @@ def chunk_text(text, max_length=500):
36
  chunks.append(chunk.strip())
37
  return chunks
38
 
39
- # Function to compute embeddings using NumPy
40
- def compute_embeddings(chunks):
41
- embeddings = []
42
- for chunk in chunks:
43
- vector = np.array([ord(char) for char in chunk[:300]], dtype=np.float32) # Truncate to 300 characters
44
- padded_vector = np.pad(vector, (0, 300 - len(vector)), mode="constant")
45
- embeddings.append(padded_vector)
46
- return np.vstack(embeddings)
47
-
48
  # Function to create FAISS index
49
- def create_faiss_index(embeddings):
50
- dimension = embeddings.shape[1]
 
51
  index = faiss.IndexFlatL2(dimension)
52
  index.add(embeddings)
53
- return index
54
 
55
  # Function to query Groq API
56
  def query_groq(question, model_name="llama-3.3-70b-versatile"):
@@ -70,17 +62,17 @@ def main():
70
  st.write("Document text extracted successfully!")
71
 
72
  st.write("Chunking and embedding text...")
 
73
  chunks = chunk_text(text)
74
- embeddings = compute_embeddings(chunks)
75
- index = create_faiss_index(embeddings)
76
  st.write(f"Created FAISS index with {len(chunks)} chunks.")
77
 
78
  # Query input
79
  question = st.text_input("Ask a question based on the document:")
80
  if question:
81
  st.write("Searching for relevant chunks...")
82
- question_embedding = compute_embeddings([question])[0].reshape(1, -1)
83
- distances, indices = index.search(question_embedding, k=1)
84
  relevant_chunk = chunks[indices[0][0]]
85
 
86
  st.write("Generating answer using Groq API...")
@@ -92,3 +84,4 @@ if __name__ == "__main__":
92
  main()
93
 
94
 
 
 
2
  import streamlit as st
3
  import PyPDF2
4
  import requests
5
+ from sentence_transformers import SentenceTransformer
6
  import faiss
7
  from groq import Groq
8
 
 
36
  chunks.append(chunk.strip())
37
  return chunks
38
 
 
 
 
 
 
 
 
 
 
39
  # Function to create FAISS index
40
+ def create_faiss_index(chunks, model):
41
+ embeddings = model.encode(chunks)
42
+ dimension = len(embeddings[0])
43
  index = faiss.IndexFlatL2(dimension)
44
  index.add(embeddings)
45
+ return index, chunks
46
 
47
  # Function to query Groq API
48
  def query_groq(question, model_name="llama-3.3-70b-versatile"):
 
62
  st.write("Document text extracted successfully!")
63
 
64
  st.write("Chunking and embedding text...")
65
+ model = SentenceTransformer("all-MiniLM-L6-v2")
66
  chunks = chunk_text(text)
67
+ index, chunks = create_faiss_index(chunks, model)
 
68
  st.write(f"Created FAISS index with {len(chunks)} chunks.")
69
 
70
  # Query input
71
  question = st.text_input("Ask a question based on the document:")
72
  if question:
73
  st.write("Searching for relevant chunks...")
74
+ question_embedding = model.encode([question])
75
+ _, indices = index.search(question_embedding, k=1)
76
  relevant_chunk = chunks[indices[0][0]]
77
 
78
  st.write("Generating answer using Groq API...")
 
84
  main()
85
 
86
 
87
+