167AliRaza commited on
Commit
238717d
·
verified ·
1 Parent(s): 65956db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -31
app.py CHANGED
@@ -8,6 +8,7 @@ import gradio as gr
8
  import google.generativeai as genai
9
  from typing import List, Tuple
10
  import time
 
11
 
12
  # Configure Gemini API
13
  def configure_gemini_api(api_key: str):
@@ -47,20 +48,23 @@ def generate_mcqs_from_chunk(chunk: str, api_key: str) -> List[List[str]]:
47
  ]
48
 
49
  prompt = f"""
50
- Generate 10 multiple choice questions from the following text.
51
  Each question must have:
52
  - A clear, specific question
53
  - 4 options labeled A, B, C, D
54
  - One correct answer (A, B, C, or D)
55
 
56
- Format your response as CSV with headers: Question,OptionA,OptionB,OptionC,OptionD,CorrectAnswer
 
57
 
58
- Important formatting rules:
 
59
  - Use commas only as field separators
60
  - If any field contains a comma, wrap it in double quotes
61
- - Each row should be on a new line
62
- - Make questions specific and clear
63
- - Ensure options are distinct and plausible
 
64
 
65
  Text to analyze:
66
  {chunk}
@@ -85,32 +89,41 @@ def generate_mcqs_from_chunk(chunk: str, api_key: str) -> List[List[str]]:
85
 
86
  if response and response.text:
87
  output = response.text.strip()
88
- lines = output.splitlines()
89
 
90
- # Skip header if present
91
- for line in lines[1:] if lines and 'Question' in lines[0] else lines:
92
- if line.strip():
93
- # Simple CSV parsing (you might want to use csv module for better handling)
94
- parts = []
95
- current_part = ""
96
- in_quotes = False
97
-
98
- for char in line:
99
- if char == '"':
100
- in_quotes = not in_quotes
101
- elif char == ',' and not in_quotes:
102
- parts.append(current_part.strip().strip('"'))
103
- current_part = ""
104
- else:
105
- current_part += char
106
-
107
- # Add the last part
108
- if current_part:
109
- parts.append(current_part.strip().strip('"'))
110
 
111
- if len(parts) >= 6 and parts[0].strip():
112
- mcq_data.append(parts[:6])
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
 
114
  return mcq_data
115
 
116
  def process_pdf_to_mcqs(pdf_file, api_key: str, chunk_size: int = 1500, progress=gr.Progress()) -> Tuple[str, str]:
@@ -154,8 +167,17 @@ def process_pdf_to_mcqs(pdf_file, api_key: str, chunk_size: int = 1500, progress
154
  if not all_mcq_data:
155
  return "❌ No MCQs could be generated from the PDF content", None
156
 
 
 
 
 
 
 
 
 
 
157
  # Create DataFrame
158
- df = pd.DataFrame(all_mcq_data, columns=['Question', 'OptionA', 'OptionB', 'OptionC', 'OptionD', 'CorrectAnswer'])
159
 
160
  # Create temporary Excel file for download
161
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx', mode='wb')
@@ -167,7 +189,7 @@ def process_pdf_to_mcqs(pdf_file, api_key: str, chunk_size: int = 1500, progress
167
 
168
  progress(1.0, desc="Complete!")
169
 
170
- success_message = f"✅ Successfully generated {len(all_mcq_data)} MCQs from {total_chunks} text chunks!"
171
 
172
  return success_message, temp_file.name
173
 
 
8
  import google.generativeai as genai
9
  from typing import List, Tuple
10
  import time
11
+ import csv
12
 
13
  # Configure Gemini API
14
  def configure_gemini_api(api_key: str):
 
48
  ]
49
 
50
  prompt = f"""
51
+ Generate exactly 10 multiple choice questions from the following text.
52
  Each question must have:
53
  - A clear, specific question
54
  - 4 options labeled A, B, C, D
55
  - One correct answer (A, B, C, or D)
56
 
57
+ IMPORTANT: Do NOT include any headers or column names in your response.
58
+ Format each question as: Question,OptionA,OptionB,OptionC,OptionD,CorrectAnswer
59
 
60
+ Rules:
61
+ - Start directly with the first question, no headers
62
  - Use commas only as field separators
63
  - If any field contains a comma, wrap it in double quotes
64
+ - Each question should be on a new line
65
+ - Make questions specific and clear based on the text content
66
+ - Ensure all 4 options are plausible but only one is correct
67
+ - The correct answer should be A, B, C, or D only
68
 
69
  Text to analyze:
70
  {chunk}
 
89
 
90
  if response and response.text:
91
  output = response.text.strip()
92
+ lines = [line.strip() for line in output.splitlines() if line.strip()]
93
 
94
+ for line in lines:
95
+ # Skip any header lines that might still appear
96
+ if ('Question' in line and 'OptionA' in line and 'OptionB' in line) or line.startswith('Question,'):
97
+ continue
98
+
99
+ # Skip empty lines or lines that don't look like MCQs
100
+ if not line or line.count(',') < 5:
101
+ continue
102
+
103
+ # Parse CSV line using proper CSV parsing
104
+ import csv
105
+ try:
106
+ # Use StringIO to parse the line as CSV
107
+ csv_reader = csv.reader([line])
108
+ parts = next(csv_reader)
 
 
 
 
 
109
 
110
+ # Ensure we have exactly 6 parts and the question is not empty
111
+ if len(parts) >= 6 and parts[0].strip() and not parts[0].lower().startswith('question'):
112
+ # Clean up each part
113
+ cleaned_parts = [part.strip() for part in parts[:6]]
114
+ # Validate that correct answer is A, B, C, or D
115
+ if cleaned_parts[5].upper() in ['A', 'B', 'C', 'D']:
116
+ mcq_data.append(cleaned_parts)
117
+
118
+ except csv.Error:
119
+ # Fallback to simple split if CSV parsing fails
120
+ parts = line.split(',')
121
+ if len(parts) >= 6 and parts[0].strip() and not parts[0].lower().startswith('question'):
122
+ cleaned_parts = [part.strip().strip('"') for part in parts[:6]]
123
+ if cleaned_parts[5].upper() in ['A', 'B', 'C', 'D']:
124
+ mcq_data.append(cleaned_parts)
125
 
126
+ print(f"Generated {len(mcq_data)} MCQs from chunk")
127
  return mcq_data
128
 
129
  def process_pdf_to_mcqs(pdf_file, api_key: str, chunk_size: int = 1500, progress=gr.Progress()) -> Tuple[str, str]:
 
167
  if not all_mcq_data:
168
  return "❌ No MCQs could be generated from the PDF content", None
169
 
170
+ # Remove any duplicate questions
171
+ seen_questions = set()
172
+ unique_mcq_data = []
173
+ for mcq in all_mcq_data:
174
+ question_text = mcq[0].lower().strip()
175
+ if question_text not in seen_questions:
176
+ seen_questions.add(question_text)
177
+ unique_mcq_data.append(mcq)
178
+
179
  # Create DataFrame
180
+ df = pd.DataFrame(unique_mcq_data, columns=['Question', 'OptionA', 'OptionB', 'OptionC', 'OptionD', 'CorrectAnswer'])
181
 
182
  # Create temporary Excel file for download
183
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx', mode='wb')
 
189
 
190
  progress(1.0, desc="Complete!")
191
 
192
+ success_message = f"✅ Successfully generated {len(unique_mcq_data)} unique MCQs from {total_chunks} text chunks!"
193
 
194
  return success_message, temp_file.name
195