167AliRaza commited on
Commit
c0e8eab
·
verified ·
1 Parent(s): 7e83cf1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -0
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from pdf2image import convert_from_path
4
+ import pytesseract
5
+ import google.generativeai as genai
6
+ from io import BytesIO
7
+
8
+ # Function: Extract text from PDF
9
+ def extract_text_from_pdf(pdf_file):
10
+ pages = convert_from_path(pdf_file)
11
+ all_text = ""
12
+ for page in pages:
13
+ text = pytesseract.image_to_string(page)
14
+ all_text += text + "\n"
15
+ return all_text.strip()
16
+
17
+ # Function: Chunk text
18
+ def chunk_text(text, chunk_size=1500):
19
+ words = text.split()
20
+ for i in range(0, len(words), chunk_size):
21
+ yield ' '.join(words[i:i+chunk_size])
22
+
23
+ # Models to try (fallbacks)
24
+ models_to_try = [
25
+ "gemini-2.5-flash-lite",
26
+ "gemini-2.5-flash",
27
+ "gemini-2.5-pro",
28
+ "gemini-2.0-flash-lite",
29
+ "gemini-2.0-flash",
30
+ "gemini-1.5-flash",
31
+ "gemini-1.5-pro"
32
+ ]
33
+
34
+ # Function: Generate MCQs
35
+ def generate_mcqs(text, api_key):
36
+ genai.configure(api_key=api_key)
37
+ chunks = list(chunk_text(text, 1500))
38
+ mcq_data = []
39
+
40
+ for i, chunk in enumerate(chunks, start=1):
41
+ prompt = f"""
42
+ Generate 10 MCQs from the following text.
43
+ Each question must have:
44
+ - Question
45
+ - 4 Options (A-D)
46
+ - Correct Answer
47
+ Return in CSV format: Question,OptionA,OptionB,OptionC,OptionD,CorrectAnswer.
48
+ Text:\n{chunk}
49
+ """
50
+
51
+ response = None
52
+ for model_name in models_to_try:
53
+ try:
54
+ model = genai.GenerativeModel(model_name)
55
+ response = model.generate_content(prompt)
56
+ if response.text:
57
+ break
58
+ except Exception:
59
+ continue
60
+
61
+ if response and response.text:
62
+ output = response.text.strip()
63
+ for line in output.splitlines():
64
+ parts = line.split(",")
65
+ if len(parts) >= 6 and parts[0]:
66
+ mcq_data.append(parts)
67
+
68
+ filtered_mcq_data = [row for row in mcq_data if len(row) == 6]
69
+ if not filtered_mcq_data:
70
+ return None, None
71
+
72
+ df = pd.DataFrame(filtered_mcq_data, columns=["Question", "OptionA", "OptionB", "OptionC", "OptionD", "CorrectAnswer"])
73
+ return df, df.head(10).to_markdown() # Show preview
74
+
75
+ # Gradio pipeline
76
+ def process_pdf(pdf_file, api_key):
77
+ if not api_key:
78
+ return "❌ Please enter your Gemini API key.", None
79
+
80
+ try:
81
+ text = extract_text_from_pdf(pdf_file.name)
82
+ df, preview = generate_mcqs(text, api_key)
83
+
84
+ if df is None:
85
+ return "❌ No valid MCQs generated.", None
86
+
87
+ # Save to Excel in memory
88
+ output_file = BytesIO()
89
+ df.to_excel(output_file, index=False)
90
+ output_file.seek(0)
91
+
92
+ return preview, output_file
93
+ except Exception as e:
94
+ return f"Error: {str(e)}", None
95
+
96
+ # Gradio UI
97
+ with gr.Blocks() as demo:
98
+ gr.Markdown("## 📘 PDF to MCQ Generator (Gemini AI)")
99
+ gr.Markdown("Upload a PDF, enter your Gemini API key, extract text with OCR, and generate MCQs saved as Excel.")
100
+
101
+ api_key = gr.Textbox(label="Enter your Gemini API Key", type="password")
102
+ pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
103
+ generate_btn = gr.Button("Generate MCQs")
104
+
105
+ preview_output = gr.Textbox(label="Preview (First 10 MCQs)", lines=15)
106
+ excel_output = gr.File(label="Download Excel (.xlsx)")
107
+
108
+ generate_btn.click(
109
+ fn=process_pdf,
110
+ inputs=[pdf_input, api_key],
111
+ outputs=[preview_output, excel_output]
112
+ )
113
+
114
+ # Run app
115
+ if __name__ == "__main__":
116
+ demo.launch()