import os import streamlit as st from groq import Groq import PyPDF2 # Initialize Groq client client = Groq( api_key=os.environ.get("GROQ_API_KEY"), ) # Function to extract text from uploaded PDF def extract_text_from_pdf(pdf_file): try: pdf_reader = PyPDF2.PdfReader(pdf_file) text = "" for page in pdf_reader.pages: text += page.extract_text() return text except Exception as e: st.error(f"Error reading PDF: {e}") return "" # Function to split text into smaller chunks def chunk_text(text, max_tokens=5000): words = text.split() chunks = [] current_chunk = [] current_tokens = 0 for word in words: current_tokens += len(word) + 1 # Account for word + space if current_tokens > max_tokens: chunks.append(" ".join(current_chunk)) current_chunk = [] current_tokens = len(word) + 1 current_chunk.append(word) if current_chunk: chunks.append(" ".join(current_chunk)) return chunks # Function to generate questions from text using Groq API def generate_questions_from_text(text): chunks = chunk_text(text, max_tokens=5000) all_questions = [] for i, chunk in enumerate(chunks): try: chat_completion = client.chat.completions.create( messages=[ { "role": "user", "content": f"Generate a list of questions based on the following content:\n{chunk}", } ], model="llama-3.3-70b-versatile", stream=False, ) questions = chat_completion.choices[0].message.content all_questions.append(f"Chunk {i+1} Questions:\n{questions}") except Exception as e: st.error(f"Error processing chunk {i+1}: {e}") return "\n\n".join(all_questions) # Streamlit app UI and logic st.title("Generate Questions from Your PDF") uploaded_file = st.file_uploader("Upload a PDF file", type="pdf") if uploaded_file is not None: st.write("### Uploaded PDF:") text = extract_text_from_pdf(uploaded_file) if not text.strip(): st.error("No text could be extracted from the uploaded PDF.") else: st.text_area("Extracted Text", text, height=300) if st.button("Generate Questions"): with st.spinner("Generating questions..."): questions = generate_questions_from_text(text) if questions.strip(): st.write("### Generated Questions:") st.write(questions) else: st.error("No questions could be generated from the content.") else: st.info("Please upload a PDF to start.")