hussainraza's picture
Update app.py
1ff52e0 verified
import os
import streamlit as st
from groq import Groq
import PyPDF2
# Initialize Groq client
client = Groq(
api_key=os.environ.get("GROQ_API_KEY"),
)
# Function to extract text from uploaded PDF
def extract_text_from_pdf(pdf_file):
try:
pdf_reader = PyPDF2.PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
except Exception as e:
st.error(f"Error reading PDF: {e}")
return ""
# Function to split text into smaller chunks
def chunk_text(text, max_tokens=5000):
words = text.split()
chunks = []
current_chunk = []
current_tokens = 0
for word in words:
current_tokens += len(word) + 1 # Account for word + space
if current_tokens > max_tokens:
chunks.append(" ".join(current_chunk))
current_chunk = []
current_tokens = len(word) + 1
current_chunk.append(word)
if current_chunk:
chunks.append(" ".join(current_chunk))
return chunks
# Function to generate questions from text using Groq API
def generate_questions_from_text(text):
chunks = chunk_text(text, max_tokens=5000)
all_questions = []
for i, chunk in enumerate(chunks):
try:
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": f"Generate a list of questions based on the following content:\n{chunk}",
}
],
model="llama-3.3-70b-versatile",
stream=False,
)
questions = chat_completion.choices[0].message.content
all_questions.append(f"Chunk {i+1} Questions:\n{questions}")
except Exception as e:
st.error(f"Error processing chunk {i+1}: {e}")
return "\n\n".join(all_questions)
# Streamlit app UI and logic
st.title("Generate Questions from Your PDF")
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
if uploaded_file is not None:
st.write("### Uploaded PDF:")
text = extract_text_from_pdf(uploaded_file)
if not text.strip():
st.error("No text could be extracted from the uploaded PDF.")
else:
st.text_area("Extracted Text", text, height=300)
if st.button("Generate Questions"):
with st.spinner("Generating questions..."):
questions = generate_questions_from_text(text)
if questions.strip():
st.write("### Generated Questions:")
st.write(questions)
else:
st.error("No questions could be generated from the content.")
else:
st.info("Please upload a PDF to start.")