Spaces:
Build error
Build error
| import os | |
| import streamlit as st | |
| from groq import Groq | |
| import PyPDF2 | |
| # Initialize Groq client | |
| client = Groq( | |
| api_key=os.environ.get("GROQ_API_KEY"), | |
| ) | |
| # Function to extract text from uploaded PDF | |
| def extract_text_from_pdf(pdf_file): | |
| try: | |
| pdf_reader = PyPDF2.PdfReader(pdf_file) | |
| text = "" | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() | |
| return text | |
| except Exception as e: | |
| st.error(f"Error reading PDF: {e}") | |
| return "" | |
| # Function to split text into smaller chunks | |
| def chunk_text(text, max_tokens=5000): | |
| words = text.split() | |
| chunks = [] | |
| current_chunk = [] | |
| current_tokens = 0 | |
| for word in words: | |
| current_tokens += len(word) + 1 # Account for word + space | |
| if current_tokens > max_tokens: | |
| chunks.append(" ".join(current_chunk)) | |
| current_chunk = [] | |
| current_tokens = len(word) + 1 | |
| current_chunk.append(word) | |
| if current_chunk: | |
| chunks.append(" ".join(current_chunk)) | |
| return chunks | |
| # Function to generate questions from text using Groq API | |
| def generate_questions_from_text(text): | |
| chunks = chunk_text(text, max_tokens=5000) | |
| all_questions = [] | |
| for i, chunk in enumerate(chunks): | |
| try: | |
| chat_completion = client.chat.completions.create( | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": f"Generate a list of questions based on the following content:\n{chunk}", | |
| } | |
| ], | |
| model="llama-3.3-70b-versatile", | |
| stream=False, | |
| ) | |
| questions = chat_completion.choices[0].message.content | |
| all_questions.append(f"Chunk {i+1} Questions:\n{questions}") | |
| except Exception as e: | |
| st.error(f"Error processing chunk {i+1}: {e}") | |
| return "\n\n".join(all_questions) | |
| # Streamlit app UI and logic | |
| st.title("Generate Questions from Your PDF") | |
| uploaded_file = st.file_uploader("Upload a PDF file", type="pdf") | |
| if uploaded_file is not None: | |
| st.write("### Uploaded PDF:") | |
| text = extract_text_from_pdf(uploaded_file) | |
| if not text.strip(): | |
| st.error("No text could be extracted from the uploaded PDF.") | |
| else: | |
| st.text_area("Extracted Text", text, height=300) | |
| if st.button("Generate Questions"): | |
| with st.spinner("Generating questions..."): | |
| questions = generate_questions_from_text(text) | |
| if questions.strip(): | |
| st.write("### Generated Questions:") | |
| st.write(questions) | |
| else: | |
| st.error("No questions could be generated from the content.") | |
| else: | |
| st.info("Please upload a PDF to start.") | |