Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -39,8 +39,12 @@ def chunk_text(text: str, chunk_size: int = 1500) -> List[str]:
|
|
| 39 |
chunks.append(' '.join(words[i:i+chunk_size]))
|
| 40 |
return chunks
|
| 41 |
|
| 42 |
-
def generate_mcqs_from_chunk(chunk: str, api_key: str) -> List[List[str]]:
|
| 43 |
"""Generate MCQs from a text chunk using Gemini API"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
models_to_try = [
|
| 45 |
'gemini-2.0-flash-exp',
|
| 46 |
'gemini-1.5-flash',
|
|
@@ -78,34 +82,45 @@ def generate_mcqs_from_chunk(chunk: str, api_key: str) -> List[List[str]]:
|
|
| 78 |
|
| 79 |
for model_name in models_to_try:
|
| 80 |
try:
|
|
|
|
| 81 |
model = genai.GenerativeModel(model_name)
|
| 82 |
response = model.generate_content(prompt)
|
| 83 |
|
| 84 |
if response.text:
|
|
|
|
| 85 |
break
|
| 86 |
except Exception as e:
|
| 87 |
-
print(f"Error with {model_name}: {e}")
|
| 88 |
continue
|
| 89 |
|
| 90 |
if response and response.text:
|
| 91 |
output = response.text.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
lines = [line.strip() for line in output.splitlines() if line.strip()]
|
|
|
|
| 93 |
|
| 94 |
-
for line in lines:
|
|
|
|
|
|
|
| 95 |
# Skip any header lines that might still appear
|
| 96 |
if ('Question' in line and 'OptionA' in line and 'OptionB' in line) or line.startswith('Question,'):
|
|
|
|
| 97 |
continue
|
| 98 |
|
| 99 |
# Skip empty lines or lines that don't look like MCQs
|
| 100 |
if not line or line.count(',') < 5:
|
|
|
|
| 101 |
continue
|
| 102 |
|
| 103 |
# Parse CSV line using proper CSV parsing
|
| 104 |
-
import csv
|
| 105 |
try:
|
| 106 |
# Use StringIO to parse the line as CSV
|
| 107 |
csv_reader = csv.reader([line])
|
| 108 |
parts = next(csv_reader)
|
|
|
|
| 109 |
|
| 110 |
# Ensure we have exactly 6 parts and the question is not empty
|
| 111 |
if len(parts) >= 6 and parts[0].strip() and not parts[0].lower().startswith('question'):
|
|
@@ -114,16 +129,27 @@ def generate_mcqs_from_chunk(chunk: str, api_key: str) -> List[List[str]]:
|
|
| 114 |
# Validate that correct answer is A, B, C, or D
|
| 115 |
if cleaned_parts[5].upper() in ['A', 'B', 'C', 'D']:
|
| 116 |
mcq_data.append(cleaned_parts)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
-
except csv.Error:
|
|
|
|
| 119 |
# Fallback to simple split if CSV parsing fails
|
| 120 |
parts = line.split(',')
|
| 121 |
if len(parts) >= 6 and parts[0].strip() and not parts[0].lower().startswith('question'):
|
| 122 |
cleaned_parts = [part.strip().strip('"') for part in parts[:6]]
|
| 123 |
if cleaned_parts[5].upper() in ['A', 'B', 'C', 'D']:
|
| 124 |
mcq_data.append(cleaned_parts)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
-
print(f"Generated {len(mcq_data)} MCQs from chunk")
|
| 127 |
return mcq_data
|
| 128 |
|
| 129 |
def process_pdf_to_mcqs(pdf_file, api_key: str, chunk_size: int = 1500, progress=gr.Progress()) -> Tuple[str, str]:
|
|
|
|
| 39 |
chunks.append(' '.join(words[i:i+chunk_size]))
|
| 40 |
return chunks
|
| 41 |
|
| 42 |
+
def generate_mcqs_from_chunk(chunk: str, api_key: str, chunk_number: int = 1) -> List[List[str]]:
|
| 43 |
"""Generate MCQs from a text chunk using Gemini API"""
|
| 44 |
+
print(f"\n=== PROCESSING CHUNK {chunk_number} ===")
|
| 45 |
+
print(f"Chunk length: {len(chunk)} characters")
|
| 46 |
+
print(f"Chunk preview: {chunk[:200]}...")
|
| 47 |
+
|
| 48 |
models_to_try = [
|
| 49 |
'gemini-2.0-flash-exp',
|
| 50 |
'gemini-1.5-flash',
|
|
|
|
| 82 |
|
| 83 |
for model_name in models_to_try:
|
| 84 |
try:
|
| 85 |
+
print(f"Trying model: {model_name}")
|
| 86 |
model = genai.GenerativeModel(model_name)
|
| 87 |
response = model.generate_content(prompt)
|
| 88 |
|
| 89 |
if response.text:
|
| 90 |
+
print(f"β
Successfully used model: {model_name}")
|
| 91 |
break
|
| 92 |
except Exception as e:
|
| 93 |
+
print(f"β Error with {model_name}: {e}")
|
| 94 |
continue
|
| 95 |
|
| 96 |
if response and response.text:
|
| 97 |
output = response.text.strip()
|
| 98 |
+
print(f"\n--- RAW AI RESPONSE FOR CHUNK {chunk_number} ---")
|
| 99 |
+
print(output)
|
| 100 |
+
print("--- END RAW RESPONSE ---\n")
|
| 101 |
+
|
| 102 |
lines = [line.strip() for line in output.splitlines() if line.strip()]
|
| 103 |
+
print(f"Total non-empty lines in response: {len(lines)}")
|
| 104 |
|
| 105 |
+
for idx, line in enumerate(lines):
|
| 106 |
+
print(f"Processing line {idx + 1}: {line[:100]}...")
|
| 107 |
+
|
| 108 |
# Skip any header lines that might still appear
|
| 109 |
if ('Question' in line and 'OptionA' in line and 'OptionB' in line) or line.startswith('Question,'):
|
| 110 |
+
print(f"β Skipped header line: {line[:50]}...")
|
| 111 |
continue
|
| 112 |
|
| 113 |
# Skip empty lines or lines that don't look like MCQs
|
| 114 |
if not line or line.count(',') < 5:
|
| 115 |
+
print(f"β Skipped invalid line (comma count: {line.count(',')}): {line[:50]}...")
|
| 116 |
continue
|
| 117 |
|
| 118 |
# Parse CSV line using proper CSV parsing
|
|
|
|
| 119 |
try:
|
| 120 |
# Use StringIO to parse the line as CSV
|
| 121 |
csv_reader = csv.reader([line])
|
| 122 |
parts = next(csv_reader)
|
| 123 |
+
print(f"Parsed parts: {len(parts)} fields")
|
| 124 |
|
| 125 |
# Ensure we have exactly 6 parts and the question is not empty
|
| 126 |
if len(parts) >= 6 and parts[0].strip() and not parts[0].lower().startswith('question'):
|
|
|
|
| 129 |
# Validate that correct answer is A, B, C, or D
|
| 130 |
if cleaned_parts[5].upper() in ['A', 'B', 'C', 'D']:
|
| 131 |
mcq_data.append(cleaned_parts)
|
| 132 |
+
print(f"β
Added MCQ: {cleaned_parts[0][:50]}... (Answer: {cleaned_parts[5]})")
|
| 133 |
+
else:
|
| 134 |
+
print(f"β Invalid answer format: {cleaned_parts[5]}")
|
| 135 |
+
else:
|
| 136 |
+
print(f"β Invalid parts count or empty question. Parts: {len(parts)}, First part: '{parts[0] if parts else 'N/A'}'")
|
| 137 |
|
| 138 |
+
except csv.Error as e:
|
| 139 |
+
print(f"β CSV parsing error: {e}")
|
| 140 |
# Fallback to simple split if CSV parsing fails
|
| 141 |
parts = line.split(',')
|
| 142 |
if len(parts) >= 6 and parts[0].strip() and not parts[0].lower().startswith('question'):
|
| 143 |
cleaned_parts = [part.strip().strip('"') for part in parts[:6]]
|
| 144 |
if cleaned_parts[5].upper() in ['A', 'B', 'C', 'D']:
|
| 145 |
mcq_data.append(cleaned_parts)
|
| 146 |
+
print(f"β
Added MCQ (fallback): {cleaned_parts[0][:50]}...")
|
| 147 |
+
else:
|
| 148 |
+
print(f"β Invalid answer format (fallback): {cleaned_parts[5]}")
|
| 149 |
+
else:
|
| 150 |
+
print(f"β No response received for chunk {chunk_number}")
|
| 151 |
|
| 152 |
+
print(f"Generated {len(mcq_data)} MCQs from chunk {chunk_number}")
|
| 153 |
return mcq_data
|
| 154 |
|
| 155 |
def process_pdf_to_mcqs(pdf_file, api_key: str, chunk_size: int = 1500, progress=gr.Progress()) -> Tuple[str, str]:
|