hussainraza commited on
Commit
33a30b4
·
verified ·
1 Parent(s): 513de68

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -16
app.py CHANGED
@@ -1,5 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  def chunk_text(text, max_tokens=5000):
2
- """Split text into smaller chunks."""
3
  words = text.split()
4
  chunks = []
5
  current_chunk = []
@@ -17,23 +39,51 @@ def chunk_text(text, max_tokens=5000):
17
  chunks.append(" ".join(current_chunk))
18
  return chunks
19
 
 
20
  def generate_questions_from_text(text):
21
- """Generate questions by processing text in chunks."""
22
  chunks = chunk_text(text, max_tokens=5000)
23
  all_questions = []
24
 
25
- for chunk in chunks:
26
- chat_completion = client.chat.completions.create(
27
- messages=[
28
- {
29
- "role": "user",
30
- "content": f"Generate a list of questions based on the following content:\n{chunk}",
31
- }
32
- ],
33
- model="llama-3.3-70b-versatile",
34
- stream=False,
35
- )
36
- questions = chat_completion.choices[0].message.content
37
- all_questions.append(questions)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- return "\n".join(all_questions)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from groq import Groq
4
+ import PyPDF2
5
+
6
+ # Initialize Groq client
7
+ client = Groq(
8
+ api_key=os.environ.get("GROQ_API_KEY"),
9
+ )
10
+
11
+ # Function to extract text from uploaded PDF
12
+ def extract_text_from_pdf(pdf_file):
13
+ try:
14
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
15
+ text = ""
16
+ for page in pdf_reader.pages:
17
+ text += page.extract_text()
18
+ return text
19
+ except Exception as e:
20
+ st.error(f"Error reading PDF: {e}")
21
+ return ""
22
+
23
+ # Function to split text into smaller chunks
24
  def chunk_text(text, max_tokens=5000):
 
25
  words = text.split()
26
  chunks = []
27
  current_chunk = []
 
39
  chunks.append(" ".join(current_chunk))
40
  return chunks
41
 
42
+ # Function to generate questions from text using Groq API
43
  def generate_questions_from_text(text):
 
44
  chunks = chunk_text(text, max_tokens=5000)
45
  all_questions = []
46
 
47
+ for i, chunk in enumerate(chunks):
48
+ try:
49
+ chat_completion = client.chat.completions.create(
50
+ messages=[
51
+ {
52
+ "role": "user",
53
+ "content": f"Generate a list of questions based on the following content:\n{chunk}",
54
+ }
55
+ ],
56
+ model="llama-3.3-70b-versatile",
57
+ stream=False,
58
+ )
59
+ questions = chat_completion.choices[0].message.content
60
+ all_questions.append(f"Chunk {i+1} Questions:\n{questions}")
61
+ except Exception as e:
62
+ st.error(f"Error processing chunk {i+1}: {e}")
63
+
64
+ return "\n\n".join(all_questions)
65
+
66
+ # Streamlit app UI and logic
67
+ st.title("Generate Questions from Your PDF")
68
+
69
+ uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
70
+
71
+ if uploaded_file is not None:
72
+ st.write("### Uploaded PDF:")
73
+ text = extract_text_from_pdf(uploaded_file)
74
 
75
+ if not text.strip():
76
+ st.error("No text could be extracted from the uploaded PDF.")
77
+ else:
78
+ st.text_area("Extracted Text", text, height=300)
79
+
80
+ if st.button("Generate Questions"):
81
+ with st.spinner("Generating questions..."):
82
+ questions = generate_questions_from_text(text)
83
+ if questions.strip():
84
+ st.write("### Generated Questions:")
85
+ st.write(questions)
86
+ else:
87
+ st.error("No questions could be generated from the content.")
88
+ else:
89
+ st.info("Please upload a PDF to start.")