Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,6 +8,7 @@ import gradio as gr
|
|
| 8 |
import google.generativeai as genai
|
| 9 |
from typing import List, Tuple
|
| 10 |
import time
|
|
|
|
| 11 |
|
| 12 |
# Configure Gemini API
|
| 13 |
def configure_gemini_api(api_key: str):
|
|
@@ -47,20 +48,23 @@ def generate_mcqs_from_chunk(chunk: str, api_key: str) -> List[List[str]]:
|
|
| 47 |
]
|
| 48 |
|
| 49 |
prompt = f"""
|
| 50 |
-
Generate 10 multiple choice questions from the following text.
|
| 51 |
Each question must have:
|
| 52 |
- A clear, specific question
|
| 53 |
- 4 options labeled A, B, C, D
|
| 54 |
- One correct answer (A, B, C, or D)
|
| 55 |
|
| 56 |
-
|
|
|
|
| 57 |
|
| 58 |
-
|
|
|
|
| 59 |
- Use commas only as field separators
|
| 60 |
- If any field contains a comma, wrap it in double quotes
|
| 61 |
-
- Each
|
| 62 |
-
- Make questions specific and clear
|
| 63 |
-
- Ensure options are
|
|
|
|
| 64 |
|
| 65 |
Text to analyze:
|
| 66 |
{chunk}
|
|
@@ -85,32 +89,41 @@ def generate_mcqs_from_chunk(chunk: str, api_key: str) -> List[List[str]]:
|
|
| 85 |
|
| 86 |
if response and response.text:
|
| 87 |
output = response.text.strip()
|
| 88 |
-
lines = output.splitlines()
|
| 89 |
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
if line.
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
current_part += char
|
| 106 |
-
|
| 107 |
-
# Add the last part
|
| 108 |
-
if current_part:
|
| 109 |
-
parts.append(current_part.strip().strip('"'))
|
| 110 |
|
| 111 |
-
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
|
|
|
| 114 |
return mcq_data
|
| 115 |
|
| 116 |
def process_pdf_to_mcqs(pdf_file, api_key: str, chunk_size: int = 1500, progress=gr.Progress()) -> Tuple[str, str]:
|
|
@@ -154,8 +167,17 @@ def process_pdf_to_mcqs(pdf_file, api_key: str, chunk_size: int = 1500, progress
|
|
| 154 |
if not all_mcq_data:
|
| 155 |
return "❌ No MCQs could be generated from the PDF content", None
|
| 156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
# Create DataFrame
|
| 158 |
-
df = pd.DataFrame(
|
| 159 |
|
| 160 |
# Create temporary Excel file for download
|
| 161 |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx', mode='wb')
|
|
@@ -167,7 +189,7 @@ def process_pdf_to_mcqs(pdf_file, api_key: str, chunk_size: int = 1500, progress
|
|
| 167 |
|
| 168 |
progress(1.0, desc="Complete!")
|
| 169 |
|
| 170 |
-
success_message = f"✅ Successfully generated {len(
|
| 171 |
|
| 172 |
return success_message, temp_file.name
|
| 173 |
|
|
|
|
| 8 |
import google.generativeai as genai
|
| 9 |
from typing import List, Tuple
|
| 10 |
import time
|
| 11 |
+
import csv
|
| 12 |
|
| 13 |
# Configure Gemini API
|
| 14 |
def configure_gemini_api(api_key: str):
|
|
|
|
| 48 |
]
|
| 49 |
|
| 50 |
prompt = f"""
|
| 51 |
+
Generate exactly 10 multiple choice questions from the following text.
|
| 52 |
Each question must have:
|
| 53 |
- A clear, specific question
|
| 54 |
- 4 options labeled A, B, C, D
|
| 55 |
- One correct answer (A, B, C, or D)
|
| 56 |
|
| 57 |
+
IMPORTANT: Do NOT include any headers or column names in your response.
|
| 58 |
+
Format each question as: Question,OptionA,OptionB,OptionC,OptionD,CorrectAnswer
|
| 59 |
|
| 60 |
+
Rules:
|
| 61 |
+
- Start directly with the first question, no headers
|
| 62 |
- Use commas only as field separators
|
| 63 |
- If any field contains a comma, wrap it in double quotes
|
| 64 |
+
- Each question should be on a new line
|
| 65 |
+
- Make questions specific and clear based on the text content
|
| 66 |
+
- Ensure all 4 options are plausible but only one is correct
|
| 67 |
+
- The correct answer should be A, B, C, or D only
|
| 68 |
|
| 69 |
Text to analyze:
|
| 70 |
{chunk}
|
|
|
|
| 89 |
|
| 90 |
if response and response.text:
|
| 91 |
output = response.text.strip()
|
| 92 |
+
lines = [line.strip() for line in output.splitlines() if line.strip()]
|
| 93 |
|
| 94 |
+
for line in lines:
|
| 95 |
+
# Skip any header lines that might still appear
|
| 96 |
+
if ('Question' in line and 'OptionA' in line and 'OptionB' in line) or line.startswith('Question,'):
|
| 97 |
+
continue
|
| 98 |
+
|
| 99 |
+
# Skip empty lines or lines that don't look like MCQs
|
| 100 |
+
if not line or line.count(',') < 5:
|
| 101 |
+
continue
|
| 102 |
+
|
| 103 |
+
# Parse CSV line using proper CSV parsing
|
| 104 |
+
import csv
|
| 105 |
+
try:
|
| 106 |
+
# Use StringIO to parse the line as CSV
|
| 107 |
+
csv_reader = csv.reader([line])
|
| 108 |
+
parts = next(csv_reader)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
+
# Ensure we have exactly 6 parts and the question is not empty
|
| 111 |
+
if len(parts) >= 6 and parts[0].strip() and not parts[0].lower().startswith('question'):
|
| 112 |
+
# Clean up each part
|
| 113 |
+
cleaned_parts = [part.strip() for part in parts[:6]]
|
| 114 |
+
# Validate that correct answer is A, B, C, or D
|
| 115 |
+
if cleaned_parts[5].upper() in ['A', 'B', 'C', 'D']:
|
| 116 |
+
mcq_data.append(cleaned_parts)
|
| 117 |
+
|
| 118 |
+
except csv.Error:
|
| 119 |
+
# Fallback to simple split if CSV parsing fails
|
| 120 |
+
parts = line.split(',')
|
| 121 |
+
if len(parts) >= 6 and parts[0].strip() and not parts[0].lower().startswith('question'):
|
| 122 |
+
cleaned_parts = [part.strip().strip('"') for part in parts[:6]]
|
| 123 |
+
if cleaned_parts[5].upper() in ['A', 'B', 'C', 'D']:
|
| 124 |
+
mcq_data.append(cleaned_parts)
|
| 125 |
|
| 126 |
+
print(f"Generated {len(mcq_data)} MCQs from chunk")
|
| 127 |
return mcq_data
|
| 128 |
|
| 129 |
def process_pdf_to_mcqs(pdf_file, api_key: str, chunk_size: int = 1500, progress=gr.Progress()) -> Tuple[str, str]:
|
|
|
|
| 167 |
if not all_mcq_data:
|
| 168 |
return "❌ No MCQs could be generated from the PDF content", None
|
| 169 |
|
| 170 |
+
# Remove any duplicate questions
|
| 171 |
+
seen_questions = set()
|
| 172 |
+
unique_mcq_data = []
|
| 173 |
+
for mcq in all_mcq_data:
|
| 174 |
+
question_text = mcq[0].lower().strip()
|
| 175 |
+
if question_text not in seen_questions:
|
| 176 |
+
seen_questions.add(question_text)
|
| 177 |
+
unique_mcq_data.append(mcq)
|
| 178 |
+
|
| 179 |
# Create DataFrame
|
| 180 |
+
df = pd.DataFrame(unique_mcq_data, columns=['Question', 'OptionA', 'OptionB', 'OptionC', 'OptionD', 'CorrectAnswer'])
|
| 181 |
|
| 182 |
# Create temporary Excel file for download
|
| 183 |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx', mode='wb')
|
|
|
|
| 189 |
|
| 190 |
progress(1.0, desc="Complete!")
|
| 191 |
|
| 192 |
+
success_message = f"✅ Successfully generated {len(unique_mcq_data)} unique MCQs from {total_chunks} text chunks!"
|
| 193 |
|
| 194 |
return success_message, temp_file.name
|
| 195 |
|