|
|
import streamlit as st |
|
|
import PyPDF2 |
|
|
from groq import Groq |
|
|
import os |
|
|
|
|
|
|
|
|
os.environ["GROQ_API_KEY"] = "myKey" |
|
|
|
|
|
|
|
|
client = Groq(api_key=os.environ.get("GROQ_API_KEY")) |
|
|
|
|
|
|
|
|
def extract_pdf_content(pdf_file): |
|
|
pdf_text = "" |
|
|
reader = PyPDF2.PdfReader(pdf_file) |
|
|
for page in reader.pages: |
|
|
pdf_text += page.extract_text() |
|
|
return pdf_text |
|
|
|
|
|
|
|
|
def chunk_text(text, chunk_size=1000, overlap=200): |
|
|
chunks = [] |
|
|
start = 0 |
|
|
while start < len(text): |
|
|
end = start + chunk_size |
|
|
chunk = text[start:end] |
|
|
chunks.append(chunk) |
|
|
start += chunk_size - overlap |
|
|
return chunks |
|
|
|
|
|
|
|
|
def find_relevant_chunks(chunks, query, num_chunks=3): |
|
|
return chunks[:num_chunks] |
|
|
|
|
|
|
|
|
def chatbot_response(user_query, chunks): |
|
|
relevant_chunks = find_relevant_chunks(chunks, user_query) |
|
|
combined_context = "\n\n".join(relevant_chunks) |
|
|
context = f"PDF Content:\n{combined_context}\n\nUser Query: {user_query}" |
|
|
chat_completion = client.chat.completions.create( |
|
|
messages=[{"role": "user", "content": context}], |
|
|
model="llama-3.3-70b-versatile", |
|
|
) |
|
|
return chat_completion.choices[0].message.content |
|
|
|
|
|
|
|
|
st.title("PDF Query Chatbot") |
|
|
st.write("Upload a PDF and ask questions based on its content.") |
|
|
|
|
|
|
|
|
pdf_file = st.file_uploader("Upload a PDF file", type=["pdf"]) |
|
|
|
|
|
if pdf_file: |
|
|
with st.spinner("Extracting content..."): |
|
|
pdf_content = extract_pdf_content(pdf_file) |
|
|
chunks = chunk_text(pdf_content) |
|
|
st.success("PDF content loaded successfully!") |
|
|
|
|
|
user_query = st.text_input("Ask a question about the PDF:") |
|
|
|
|
|
if user_query: |
|
|
with st.spinner("Fetching response..."): |
|
|
response = chatbot_response(user_query, chunks) |
|
|
st.write(f"**Chatbot Response:** {response}") |
|
|
|