File size: 2,830 Bytes
33a30b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ff52e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33a30b4
 
1ff52e0
33a30b4
 
 
 
 
 
513de68
33a30b4
 
 
 
 
1ff52e0
 
 
 
 
 
33a30b4
1ff52e0
33a30b4
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import os
import streamlit as st
from groq import Groq
import PyPDF2

# Initialize Groq client
client = Groq(
    api_key=os.environ.get("GROQ_API_KEY"),
)

# Function to extract text from uploaded PDF
def extract_text_from_pdf(pdf_file):
    try:
        pdf_reader = PyPDF2.PdfReader(pdf_file)
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text()
        return text
    except Exception as e:
        st.error(f"Error reading PDF: {e}")
        return ""

# Function to split text into smaller chunks
def chunk_text(text, max_tokens=5000):
    words = text.split()
    chunks = []
    current_chunk = []
    current_tokens = 0
    
    for word in words:
        current_tokens += len(word) + 1  # Account for word + space
        if current_tokens > max_tokens:
            chunks.append(" ".join(current_chunk))
            current_chunk = []
            current_tokens = len(word) + 1
        current_chunk.append(word)
    
    if current_chunk:
        chunks.append(" ".join(current_chunk))
    return chunks

# Function to generate questions from text using Groq API
def generate_questions_from_text(text):
    chunks = chunk_text(text, max_tokens=5000)
    all_questions = []
    
    for i, chunk in enumerate(chunks):
        try:
            chat_completion = client.chat.completions.create(
                messages=[
                    {
                        "role": "user",
                        "content": f"Generate a list of questions based on the following content:\n{chunk}",
                    }
                ],
                model="llama-3.3-70b-versatile",
                stream=False,
            )
            questions = chat_completion.choices[0].message.content
            all_questions.append(f"Chunk {i+1} Questions:\n{questions}")
        except Exception as e:
            st.error(f"Error processing chunk {i+1}: {e}")
    
    return "\n\n".join(all_questions)

# Streamlit app UI and logic
st.title("Generate Questions from Your PDF")

uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")

if uploaded_file is not None:
    st.write("### Uploaded PDF:")
    text = extract_text_from_pdf(uploaded_file)
    
    if not text.strip():
        st.error("No text could be extracted from the uploaded PDF.")
    else:
        st.text_area("Extracted Text", text, height=300)

        if st.button("Generate Questions"):
            with st.spinner("Generating questions..."):
                questions = generate_questions_from_text(text)
                if questions.strip():
                    st.write("### Generated Questions:")
                    st.write(questions)
                else:
                    st.error("No questions could be generated from the content.")
else:
    st.info("Please upload a PDF to start.")