Spaces:

167AliRaza
/

PDF-Processor

Runtime error

App Files Files Community

167AliRaza commited on Sep 18, 2025

Commit

41c3d7f

verified ·

1 Parent(s): 238717d

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -6

app.py CHANGED Viewed

@@ -39,8 +39,12 @@ def chunk_text(text: str, chunk_size: int = 1500) -> List[str]:
         chunks.append(' '.join(words[i:i+chunk_size]))
     return chunks
-def generate_mcqs_from_chunk(chunk: str, api_key: str) -> List[List[str]]:
     """Generate MCQs from a text chunk using Gemini API"""
     models_to_try = [
         'gemini-2.0-flash-exp',
         'gemini-1.5-flash',
@@ -78,34 +82,45 @@ def generate_mcqs_from_chunk(chunk: str, api_key: str) -> List[List[str]]:
     for model_name in models_to_try:
         try:
             model = genai.GenerativeModel(model_name)
             response = model.generate_content(prompt)
             if response.text:
                 break
         except Exception as e:
-            print(f"Error with {model_name}: {e}")
             continue
     if response and response.text:
         output = response.text.strip()
         lines = [line.strip() for line in output.splitlines() if line.strip()]
-        for line in lines:
             # Skip any header lines that might still appear
             if ('Question' in line and 'OptionA' in line and 'OptionB' in line) or line.startswith('Question,'):
                 continue
             # Skip empty lines or lines that don't look like MCQs
             if not line or line.count(',') < 5:
                 continue
             # Parse CSV line using proper CSV parsing
-            import csv
             try:
                 # Use StringIO to parse the line as CSV
                 csv_reader = csv.reader([line])
                 parts = next(csv_reader)
                 # Ensure we have exactly 6 parts and the question is not empty
                 if len(parts) >= 6 and parts[0].strip() and not parts[0].lower().startswith('question'):
@@ -114,16 +129,27 @@ def generate_mcqs_from_chunk(chunk: str, api_key: str) -> List[List[str]]:
                     # Validate that correct answer is A, B, C, or D
                     if cleaned_parts[5].upper() in ['A', 'B', 'C', 'D']:
                         mcq_data.append(cleaned_parts)
-            except csv.Error:
                 # Fallback to simple split if CSV parsing fails
                 parts = line.split(',')
                 if len(parts) >= 6 and parts[0].strip() and not parts[0].lower().startswith('question'):
                     cleaned_parts = [part.strip().strip('"') for part in parts[:6]]
                     if cleaned_parts[5].upper() in ['A', 'B', 'C', 'D']:
                         mcq_data.append(cleaned_parts)
-    print(f"Generated {len(mcq_data)} MCQs from chunk")
     return mcq_data
 def process_pdf_to_mcqs(pdf_file, api_key: str, chunk_size: int = 1500, progress=gr.Progress()) -> Tuple[str, str]:

         chunks.append(' '.join(words[i:i+chunk_size]))
     return chunks
+def generate_mcqs_from_chunk(chunk: str, api_key: str, chunk_number: int = 1) -> List[List[str]]:
     """Generate MCQs from a text chunk using Gemini API"""
+    print(f"\n=== PROCESSING CHUNK {chunk_number} ===")
+    print(f"Chunk length: {len(chunk)} characters")
+    print(f"Chunk preview: {chunk[:200]}...")
     models_to_try = [
         'gemini-2.0-flash-exp',
         'gemini-1.5-flash',
     for model_name in models_to_try:
         try:
+            print(f"Trying model: {model_name}")
             model = genai.GenerativeModel(model_name)
             response = model.generate_content(prompt)
             if response.text:
+                print(f"✅ Successfully used model: {model_name}")
                 break
         except Exception as e:
+            print(f"❌ Error with {model_name}: {e}")
             continue
     if response and response.text:
         output = response.text.strip()
+        print(f"\n--- RAW AI RESPONSE FOR CHUNK {chunk_number} ---")
+        print(output)
+        print("--- END RAW RESPONSE ---\n")
         lines = [line.strip() for line in output.splitlines() if line.strip()]
+        print(f"Total non-empty lines in response: {len(lines)}")
+        for idx, line in enumerate(lines):
+            print(f"Processing line {idx + 1}: {line[:100]}...")
             # Skip any header lines that might still appear
             if ('Question' in line and 'OptionA' in line and 'OptionB' in line) or line.startswith('Question,'):
+                print(f"❌ Skipped header line: {line[:50]}...")
                 continue
             # Skip empty lines or lines that don't look like MCQs
             if not line or line.count(',') < 5:
+                print(f"❌ Skipped invalid line (comma count: {line.count(',')}): {line[:50]}...")
                 continue
             # Parse CSV line using proper CSV parsing
             try:
                 # Use StringIO to parse the line as CSV
                 csv_reader = csv.reader([line])
                 parts = next(csv_reader)
+                print(f"Parsed parts: {len(parts)} fields")
                 # Ensure we have exactly 6 parts and the question is not empty
                 if len(parts) >= 6 and parts[0].strip() and not parts[0].lower().startswith('question'):
                     # Validate that correct answer is A, B, C, or D
                     if cleaned_parts[5].upper() in ['A', 'B', 'C', 'D']:
                         mcq_data.append(cleaned_parts)
+                        print(f"✅ Added MCQ: {cleaned_parts[0][:50]}... (Answer: {cleaned_parts[5]})")
+                    else:
+                        print(f"❌ Invalid answer format: {cleaned_parts[5]}")
+                else:
+                    print(f"❌ Invalid parts count or empty question. Parts: {len(parts)}, First part: '{parts[0] if parts else 'N/A'}'")
+            except csv.Error as e:
+                print(f"❌ CSV parsing error: {e}")
                 # Fallback to simple split if CSV parsing fails
                 parts = line.split(',')
                 if len(parts) >= 6 and parts[0].strip() and not parts[0].lower().startswith('question'):
                     cleaned_parts = [part.strip().strip('"') for part in parts[:6]]
                     if cleaned_parts[5].upper() in ['A', 'B', 'C', 'D']:
                         mcq_data.append(cleaned_parts)
+                        print(f"✅ Added MCQ (fallback): {cleaned_parts[0][:50]}...")
+                    else:
+                        print(f"❌ Invalid answer format (fallback): {cleaned_parts[5]}")
+    else:
+        print(f"❌ No response received for chunk {chunk_number}")
+    print(f"Generated {len(mcq_data)} MCQs from chunk {chunk_number}")
     return mcq_data
 def process_pdf_to_mcqs(pdf_file, api_key: str, chunk_size: int = 1500, progress=gr.Progress()) -> Tuple[str, str]: