First_Aid_Kit / app.py
NHZ's picture
Update app.py
a16e520 verified
raw
history blame
3 kB
import os
import streamlit as st
import PyPDF2
import requests
import faiss
from groq import Groq
# Initialize Groq client using the secret environment variable
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
# Function to download and read PDF content
def extract_text_from_google_drive():
link = "https://drive.google.com/uc?id=1XvqA1OIssRs2gbmOtKFKj-02yQ5X2yg0"
response = requests.get(link)
with open("document.pdf", "wb") as file:
file.write(response.content)
with open("document.pdf", "rb") as file:
reader = PyPDF2.PdfReader(file)
text = " ".join([page.extract_text() for page in reader.pages])
return text
# Function to chunk text
def chunk_text(text, max_length=500):
sentences = text.split(". ")
chunks = []
chunk = ""
for sentence in sentences:
if len(chunk) + len(sentence) <= max_length:
chunk += sentence + ". "
else:
chunks.append(chunk.strip())
chunk = sentence + ". "
if chunk:
chunks.append(chunk.strip())
return chunks
# Function to compute simple embeddings
def compute_embeddings(chunks):
embeddings = []
for chunk in chunks:
vector = [ord(char) for char in chunk[:300]] # Truncate to 300 characters
padded_vector = vector + [0] * (300 - len(vector)) # Zero-pad to fixed size
embeddings.append(padded_vector)
return embeddings
# Function to create FAISS index
def create_faiss_index(embeddings):
dimension = len(embeddings[0])
index = faiss.IndexFlatL2(dimension)
index.add(faiss.FloatVectorArray(embeddings))
return index
# Function to query Groq API
def query_groq(question, model_name="llama-3.3-70b-versatile"):
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": question}],
model=model_name,
)
return chat_completion.choices[0].message.content
# Streamlit app
def main():
st.title("RAG-based Application with Groq API")
st.subheader("Query the document stored on Google Drive")
st.write("Extracting text from the document...")
text = extract_text_from_google_drive()
st.write("Document text extracted successfully!")
st.write("Chunking and embedding text...")
chunks = chunk_text(text)
embeddings = compute_embeddings(chunks)
index = create_faiss_index(embeddings)
st.write(f"Created FAISS index with {len(chunks)} chunks.")
# Query input
question = st.text_input("Ask a question based on the document:")
if question:
st.write("Searching for relevant chunks...")
question_embedding = compute_embeddings([question])[0]
_, indices = index.search(faiss.FloatVectorArray([question_embedding]), k=1)
relevant_chunk = chunks[indices[0][0]]
st.write("Generating answer using Groq API...")
answer = query_groq(relevant_chunk)
st.write("### Answer:")
st.write(answer)
if __name__ == "__main__":
main()