Spaces:

WIPI
/

DeceptivePatternDetector

Running on Zero

App Files Files Community

Asmit Nayak commited on May 12

Commit

72d1624

1 Parent(s): 23fbfee

Refactor Gemini analysis to improve CSV response parsing and validation

Browse files

Files changed (1) hide show

py_files/gemini_analysis.py +243 -192

py_files/gemini_analysis.py CHANGED Viewed

@@ -15,7 +15,6 @@ import gradio as gr
 try:
     from google import genai
-    from google.genai import types
     from google.genai.errors import ServerError
     GENAI_AVAILABLE = True
 except ImportError:
@@ -51,6 +50,100 @@ def check_csv_format(df: pd.DataFrame) -> str:
 # analyze_with_gemini function removed - using few_shots_generator instead
 def few_shots_generator(eval_dir='./eval', files=None, api_key=None):
     """
     Generator version of few_shots that yields notifications in real-time.
@@ -116,25 +209,19 @@ def few_shots_generator(eval_dir='./eval', files=None, api_key=None):
                 yield ('notification', f"🤖 Calling Gemini AI for pattern analysis (attempt {try_cnt})...")
                 if try_cnt == 1:
                     gr.Info("🤖 Starting Gemini analysis...")
-                print(f"[CONSOLE] Attempt {try_cnt} - Calling Gemini API...")
-                response = client.models.generate_content(
                     model='gemini-3-flash-preview',
-                    contents=data,
-                    config=types.GenerateContentConfig(
-                        system_instruction=textsi_1,
-                        temperature=1,
-                        top_p=0.1,
-                        top_k=1,
-                        max_output_tokens=24*1024,
-                        safety_settings=[
-                            types.SafetySetting(category='HARM_CATEGORY_HARASSMENT', threshold='BLOCK_NONE'),
-                            types.SafetySetting(category='HARM_CATEGORY_HATE_SPEECH', threshold='BLOCK_NONE'),
-                            types.SafetySetting(category='HARM_CATEGORY_SEXUALLY_EXPLICIT', threshold='BLOCK_NONE'),
-                            types.SafetySetting(category='HARM_CATEGORY_DANGEROUS_CONTENT', threshold='BLOCK_NONE'),
-                            types.SafetySetting(category='HARM_CATEGORY_CIVIC_INTEGRITY', threshold='BLOCK_NONE')
-                        ]
-                    )
                 )
                 yield ('notification', f"✅ Gemini API call successful! Processing results...")
                 gr.Info("✅ Gemini analysis successful!")
                 print(f"[CONSOLE] Gemini API call successful")
@@ -160,104 +247,84 @@ def few_shots_generator(eval_dir='./eval', files=None, api_key=None):
                 yield 'notification', error_msg
                 raise gr.Error(f"Gemini API error: {str(e.message)}")
-        try:
-            # Process the response
-            _f = os.path.join(f"{eval_dir}", "gemini_fs", os.path.basename(f))
-            df = pd.read_csv(StringIO(response.text.replace("```csv", '').replace("```", '').strip()), sep='|')
-            csv_with_yolo = pd.read_csv(f, index_col=0)
-            gemini_cols = df[["Deceptive Design Category", "Deceptive Design Subtype", "Reasoning"]]
-            csv_with_yolo.reset_index(inplace=True)
-            final_df = pd.concat([csv_with_yolo, gemini_cols], axis=1)
-            final_df.to_csv(_f, index=False, quoting=csv.QUOTE_ALL)
-            print(f"[CONSOLE] Results saved to: {_f}")
-            # Check if thinking is needed (if any deceptive patterns found)
-            if set(final_df['Deceptive Design Category'].tolist()) != {'non-deceptive'}:
-                yield ('notification', "🧠 Deceptive patterns detected! Running advanced thinking analysis...")
-                gr.Info("🧠 Deceptive patterns found! Running advanced analysis...")
-                print(f"[CONSOLE] Deceptive patterns found, running thinking analysis...")
-                # Use generator version of thinking
-                thinking_result = None
-                for thinking_status, thinking_data in thinking_generator(eval_dir, files=[_f], api_key=api_key):
-                    if thinking_status == 'notification':
-                        yield ('notification', thinking_data)
-                    elif thinking_status == 'result':
-                        thinking_result = thinking_data
-                        break
-                if thinking_result is not None:
-                    yield ('notification', "✅ Advanced thinking analysis completed successfully!")
-                    gr.Info("✅ Advanced analysis completed!")
-                    print(f"[CONSOLE] Thinking analysis completed, using refined results")
-                    final_df = thinking_result
-                else:
-                    yield ('notification', "⚠️ Advanced thinking analysis failed, using original results")
-                    gr.Warning("⚠️ Advanced analysis failed, using basic results")
-                    print(f"[CONSOLE] Thinking analysis failed, using original results")
-            else:
-                yield ('notification', "✅ No deceptive patterns found, analysis complete!")
-                gr.Info("✅ No deceptive patterns detected!")
-                print(f"[CONSOLE] No deceptive patterns found, skipping thinking analysis")
-            yield 'result', final_df
-            return
-        except Exception as e:
-            print(f"[CONSOLE] Error parsing with pipe separator, trying comma: {e}")
             try:
-                df = pd.read_csv(StringIO(response.text.replace("```csv", '').replace("```", '').strip()), sep=',')
-                csv_with_yolo = pd.read_csv(f, index_col=0)
-                gemini_cols = df[["Deceptive Design Category", "Deceptive Design Subtype", "Reasoning"]]
-                csv_with_yolo.reset_index(inplace=True)
-                final_df = pd.concat([csv_with_yolo, gemini_cols], axis=1)
-                final_df.to_csv(_f, index=False, quoting=csv.QUOTE_ALL)
-                print(f"[CONSOLE] Results saved to: {_f} (comma separated)")
-                # Check if thinking is needed
-                if set(final_df['Deceptive Design Category'].tolist()) != {'non-deceptive'}:
-                    yield ('notification', "🧠 Deceptive patterns detected! Running advanced thinking analysis...")
-                    gr.Info("🧠 Deceptive patterns found! Running advanced analysis...")
-                    print(f"[CONSOLE] Deceptive patterns found, running thinking analysis...")
-                    # Use generator version of thinking
-                    thinking_result = None
-                    for thinking_status, thinking_data in thinking_generator(eval_dir, files=[_f], api_key=api_key):
-                        if thinking_status == 'notification':
-                            yield ('notification', thinking_data)
-                        elif thinking_status == 'result':
-                            thinking_result = thinking_data
-                            break
-                    if thinking_result is not None:
-                        yield ('notification', "✅ Advanced thinking analysis completed successfully!")
-                        gr.Info("✅ Advanced analysis completed!")
-                        print(f"[CONSOLE] Thinking analysis completed, using refined results")
-                        final_df = thinking_result
-                    else:
-                        yield ('notification', "⚠️ Advanced thinking analysis failed, using original results")
-                        gr.Warning("⚠️ Advanced analysis failed, using basic results")
-                        print(f"[CONSOLE] Thinking analysis failed, using original results")
-                else:
-                    yield ('notification', "✅ No deceptive patterns found, analysis complete!")
-                    gr.Info("✅ No deceptive patterns detected!")
-                    print(f"[CONSOLE] No deceptive patterns found, skipping thinking analysis")
-                yield ('result', final_df)
-                return
-            except Exception as e2:
-                error_msg = f"❌ Error parsing Gemini response with both separators: {str(e2)}"
-                yield ('notification', error_msg)
-                print(f"[CONSOLE] FEW_SHOT Error with both separators: {e2}")
                 try:
-                    error_file = _f.replace(".csv", "e1.txt")
-                    with open(error_file, 'w') as _fs:
-                        _fs.write(response.text)
-                    print(f"[CONSOLE] Error response saved to: {error_file}")
                 except Exception as e3:
                     print(f"[CONSOLE] Failed to save error response: {e3}")
-                raise gr.Error(f"Failed to parse response: {str(e2)}")
     yield ('result', None)
@@ -309,31 +376,24 @@ def thinking_generator(eval_dir="./eval", files=None, api_key=None):
             # Make API call to Gemini with retry logic for thinking analysis
             try_cnt = 0
-            response = None
             while try_cnt < 2:
                 try:
                     try_cnt += 1
                     yield ('notification', f"🧠 Running advanced thinking analysis (attempt {try_cnt})...")
-                    print(f"[CONSOLE] Attempt {try_cnt} - Calling Gemini API for thinking...")
-                    response = client.models.generate_content(
                         model='gemini-3-flash-preview',
-                        contents=data,
-                        config=types.GenerateContentConfig(
-                            system_instruction=textsi_1,
-                            temperature=1,
-                            top_p=0.1,
-                            top_k=1,
-                            max_output_tokens=24*1024,
-                            thinking_config=types.ThinkingConfig(include_thoughts=True),
-                            safety_settings=[
-                                types.SafetySetting(category='HARM_CATEGORY_HARASSMENT', threshold='BLOCK_NONE'),
-                                types.SafetySetting(category='HARM_CATEGORY_HATE_SPEECH', threshold='BLOCK_NONE'),
-                                types.SafetySetting(category='HARM_CATEGORY_SEXUALLY_EXPLICIT', threshold='BLOCK_NONE'),
-                                types.SafetySetting(category='HARM_CATEGORY_DANGEROUS_CONTENT', threshold='BLOCK_NONE'),
-                                types.SafetySetting(category='HARM_CATEGORY_CIVIC_INTEGRITY', threshold='BLOCK_NONE')
-                            ]
-                        )
                     )
                     yield ('notification', f"✅ Advanced thinking analysis API call successful!")
                     print(f"[CONSOLE] Thinking API call successful")
@@ -344,7 +404,7 @@ def thinking_generator(eval_dir="./eval", files=None, api_key=None):
                         yield ('notification', error_msg)
                         print(f"[CONSOLE] Failed to get thinking response after {try_cnt} attempts")
                         raise gr.Error(f"Advanced analysis failed after {try_cnt} attempts")
                     wait_msg = f"⚠️ Server error in thinking analysis. Retrying attempt {try_cnt + 1}/2 in 60 seconds..."
                     yield ('notification', wait_msg)
                     gr.Warning(f"⚠️ Thinking server error. Retrying in 60s... (attempt {try_cnt + 1}/2)")
@@ -358,16 +418,11 @@ def thinking_generator(eval_dir="./eval", files=None, api_key=None):
                     yield ('notification', error_msg)
                     print(f"[CONSOLE] Non-server error in thinking API call: {e}")
                     raise gr.Error(f"Thinking analysis API error: {str(e)}")
-            output_csv = ""
-            thought_txt = ""
-            for part in response.candidates[0].content.parts:
-                if part.thought == True:
-                    thought_txt = part.text
-                    print(f"[CONSOLE] Extracted thought text ({len(thought_txt)} chars)")
-                else:
-                    output_csv = part.text
-                    print(f"[CONSOLE] Extracted output CSV ({len(output_csv)} chars)")
             _f = os.path.join(f"{eval_dir}", "gemini_fs", os.path.basename(f))
             _f_thought = os.path.join(f"{eval_dir}", "gemini_fs", os.path.basename(f).replace(".csv", "_thinking.txt"))
@@ -377,60 +432,56 @@ def thinking_generator(eval_dir="./eval", files=None, api_key=None):
                 _f_thought_file.write(thought_txt)
             print(f"[CONSOLE] Thinking text saved to: {_f_thought}")
-            # Parse and save updated CSV with similar process as main analysis
-            try:
-                # Parse the thinking response CSV
-                df_thinking = pd.read_csv(StringIO(output_csv), sep='|')
-                # Read the original CSV file to get the base data
-                csv_with_yolo = pd.read_csv(f, index_col=0).drop(columns=["Deceptive Design Category", "Deceptive Design Subtype", "Reasoning"], errors='ignore')
-                # Extract the thinking analysis columns (similar to main process)
-                thinking_cols = df_thinking[["Deceptive Design Category", "Deceptive Design Subtype", "Reasoning"]]
-                # Reset index and concatenate with original data
-                csv_with_yolo.reset_index(inplace=True)
-                final_df = pd.concat([csv_with_yolo, thinking_cols], axis=1)
-                # Save the updated dataframe
-                final_df.to_csv(_f, index=False, quoting=csv.QUOTE_ALL)
-                print(f"[CONSOLE] Thinking results saved to: {_f} (pipe separated)")
-                yield ('result', final_df)  # Return the updated dataframe
-                return
-            except Exception as e:
-                print(f"[CONSOLE] Error with pipe separator, trying comma: {e}")
                 try:
-                    # Parse the thinking response CSV with comma separator
-                    df_thinking = pd.read_csv(StringIO(output_csv), sep=',')
-                    # Read the original CSV file to get the base data
-                    csv_with_yolo = pd.read_csv(f, index_col=0).drop(columns=["Deceptive Design Category", "Deceptive Design Subtype", "Reasoning"], errors='ignore')
-                    # Extract the thinking analysis columns (similar to main process)
-                    thinking_cols = df_thinking[["Deceptive Design Category", "Deceptive Design Subtype", "Reasoning"]]
-                    # Reset index and concatenate with original data
-                    csv_with_yolo.reset_index(inplace=True)
-                    final_df = pd.concat([csv_with_yolo, thinking_cols], axis=1)
-                    # Save the updated dataframe
-                    final_df.to_csv(_f, index=False, quoting=csv.QUOTE_ALL)
-                    print(f"[CONSOLE] Thinking results saved to: {_f} (comma separated)")
-                    yield ('result', final_df)  # Return the updated dataframe
-                    return
-                except Exception as e2:
-                    error_msg = f"❌ Error parsing thinking analysis response with both separators: {str(e2)}"
-                    yield ('notification', error_msg)
-                    print(f"[CONSOLE] THINKING ERROR with both separators: {e2}")
                     try:
-                        error_file = _f.replace(".csv", "e2.txt")
-                        with open(error_file, 'w') as _fs:
                             _fs.write(output_csv)
-                        print(f"[CONSOLE] Thinking error response saved to: {error_file}")
                     except Exception as e3:
                         print(f"[CONSOLE] Failed to save thinking error response: {e3}")
-                    raise gr.Error(f"Failed to parse thinking response: {str(e2)}")
         except Exception as e:
             error_msg = f"❌ Error in thinking analysis: {str(e)}"
             yield ('notification', error_msg)

 try:
     from google import genai
     from google.genai.errors import ServerError
     GENAI_AVAILABLE = True
 except ImportError:
 # analyze_with_gemini function removed - using few_shots_generator instead
+REQUIRED_GEMINI_COLS = ["Deceptive Design Category", "Deceptive Design Subtype", "Reasoning"]
+def _parse_response_to_df(response_text, original_csv_path, drop_existing=False):
+    """
+    Parse a Gemini CSV response and validate it can be merged with the source CSV.
+    Tries pipe then comma separators. Checks required columns, row-count match, and nulls.
+    Returns:
+        (final_df, None) on success, or (None, error_message) on failure.
+        The error_message is user-readable and safe to send back to the LLM for correction.
+    """
+    cleaned = response_text.replace("```csv", '').replace("```", '').strip()
+    parsed = None
+    sep_errors = []
+    for sep in ['|', ',']:
+        try:
+            candidate = pd.read_csv(StringIO(cleaned), sep=sep)
+            missing = [c for c in REQUIRED_GEMINI_COLS if c not in candidate.columns]
+            if missing:
+                sep_errors.append(f"separator '{sep}': missing required columns {missing}; got {list(candidate.columns)}")
+                continue
+            parsed = candidate
+            break
+        except Exception as e:
+            sep_errors.append(f"separator '{sep}': {e}")
+    if parsed is None:
+        return None, "Failed to parse CSV. " + " | ".join(sep_errors)
+    try:
+        csv_with_yolo = pd.read_csv(original_csv_path, index_col=0)
+        if drop_existing:
+            csv_with_yolo = csv_with_yolo.drop(columns=REQUIRED_GEMINI_COLS, errors='ignore')
+    except Exception as e:
+        return None, f"Could not read source CSV {original_csv_path}: {e}"
+    if len(parsed) != len(csv_with_yolo):
+        return None, (
+            f"Row count mismatch: response has {len(parsed)} rows but input has "
+            f"{len(csv_with_yolo)} rows. Output must contain exactly one row per input row."
+        )
+    gemini_cols = parsed[REQUIRED_GEMINI_COLS]
+    if gemini_cols.isnull().any().any():
+        null_rows = gemini_cols[gemini_cols.isnull().any(axis=1)].index.tolist()
+        return None, f"Null values found in required columns at row indices: {null_rows[:10]}"
+    csv_with_yolo.reset_index(inplace=True)
+    final_df = pd.concat([csv_with_yolo, gemini_cols], axis=1)
+    return final_df, None
+def _build_correction_request(error_message):
+    """
+    Build the follow-up instruction asking the model to correct its prior bad response.
+    Pair with `previous_interaction_id=<prior interaction id>` so the server provides history.
+    """
+    return (
+        f"Your previous response could not be parsed into a valid DataFrame.\n"
+        f"Validation error:\n{error_message}\n\n"
+        f"Please regenerate ONLY the corrected pipe-separated CSV output. "
+        f"Maintain exactly the same number of rows as the input. Include the required columns "
+        f"({', '.join(REQUIRED_GEMINI_COLS)}). "
+        f"Output only the CSV — no markdown code fences, no explanations."
+    )
+def _extract_model_text(interaction):
+    """Concatenate text from all model_output steps in an Interactions response."""
+    chunks = []
+    for step in interaction.steps:
+        if getattr(step, "type", None) == "model_output":
+            for block in getattr(step, "content", []) or []:
+                if getattr(block, "type", None) == "text":
+                    chunks.append(getattr(block, "text", "") or "")
+    return "".join(chunks)
+def _extract_thought_text(interaction):
+    """Concatenate thought summary text from all thought steps in an Interactions response."""
+    chunks = []
+    for step in interaction.steps:
+        if getattr(step, "type", None) == "thought":
+            summary = getattr(step, "summary", None)
+            if not summary:
+                continue
+            for block in summary:
+                if getattr(block, "type", None) == "text":
+                    chunks.append(getattr(block, "text", "") or "")
+    return "".join(chunks)
 def few_shots_generator(eval_dir='./eval', files=None, api_key=None):
     """
     Generator version of few_shots that yields notifications in real-time.
                 yield ('notification', f"🤖 Calling Gemini AI for pattern analysis (attempt {try_cnt})...")
                 if try_cnt == 1:
                     gr.Info("🤖 Starting Gemini analysis...")
+                print(f"[CONSOLE] Attempt {try_cnt} - Calling Gemini Interactions API...")
+                interaction = client.interactions.create(
                     model='gemini-3-flash-preview',
+                    input=data,
+                    system_instruction=textsi_1,
+                    generation_config={
+                        'temperature': 1,
+                        'top_p': 0.1,
+                        'max_output_tokens': 45 * 1024,
+                        'thinking_level': 'high',
+                    },
                 )
+                response_text = _extract_model_text(interaction)
                 yield ('notification', f"✅ Gemini API call successful! Processing results...")
                 gr.Info("✅ Gemini analysis successful!")
                 print(f"[CONSOLE] Gemini API call successful")
                 yield 'notification', error_msg
                 raise gr.Error(f"Gemini API error: {str(e.message)}")
+        _f = os.path.join(f"{eval_dir}", "gemini_fs", os.path.basename(f))
+        # Parse and validate; on failure, ask the model to self-correct once.
+        final_df, parse_error = _parse_response_to_df(response_text, f, drop_existing=False)
+        if final_df is None:
+            yield ('notification', f"⚠️ Output validation failed: {parse_error[:200]}. Asking Gemini to correct (1 retry)...")
+            gr.Info("⚠️ Output invalid — asking Gemini to correct")
+            print(f"[CONSOLE] FEW_SHOT parse/validation failed: {parse_error}")
             try:
+                correction_interaction = client.interactions.create(
+                    model='gemini-3-flash-preview',
+                    input=_build_correction_request(parse_error),
+                    previous_interaction_id=interaction.id,
+                    system_instruction=textsi_1,
+                    generation_config={
+                        'temperature': 1,
+                        'top_p': 0.1,
+                        'max_output_tokens': 45 * 1024,
+                        'thinking_level': 'high',
+                    },
+                )
+                interaction = correction_interaction
+                response_text = _extract_model_text(interaction)
+                final_df, parse_error = _parse_response_to_df(response_text, f, drop_existing=False)
+            except Exception as e_corr:
+                print(f"[CONSOLE] FEW_SHOT correction call failed: {e_corr}")
+                parse_error = f"Correction API call failed: {e_corr}"
+            if final_df is None:
                 try:
+                    error_file = _f.replace(".csv", "_parse_error.txt")
+                    with open(error_file, 'w', encoding='utf-8') as _fs:
+                        _fs.write(response_text)
+                    print(f"[CONSOLE] Failed response saved to: {error_file}")
                 except Exception as e3:
                     print(f"[CONSOLE] Failed to save error response: {e3}")
+                error_msg = f"❌ Gemini output could not be parsed even after correction: {parse_error}"
+                yield ('notification', error_msg)
+                raise gr.Error(f"Failed to parse response after correction: {parse_error}")
+            else:
+                yield ('notification', "✅ Gemini correction succeeded — output parsed successfully")
+                gr.Info("✅ Corrected output parsed successfully!")
+        final_df.to_csv(_f, index=False, quoting=csv.QUOTE_ALL)
+        print(f"[CONSOLE] Results saved to: {_f}")
+        # Check if thinking is needed (if any deceptive patterns found)
+        if set(final_df['Deceptive Design Category'].tolist()) != {'non-deceptive'}:
+            yield ('notification', "🧠 Deceptive patterns detected! Running advanced thinking analysis...")
+            gr.Info("🧠 Deceptive patterns found! Running advanced analysis...")
+            print(f"[CONSOLE] Deceptive patterns found, running thinking analysis...")
+            thinking_result = None
+            for thinking_status, thinking_data in thinking_generator(eval_dir, files=[_f], api_key=api_key):
+                if thinking_status == 'notification':
+                    yield ('notification', thinking_data)
+                elif thinking_status == 'result':
+                    thinking_result = thinking_data
+                    break
+            if thinking_result is not None:
+                yield ('notification', "✅ Advanced thinking analysis completed successfully!")
+                gr.Info("✅ Advanced analysis completed!")
+                print(f"[CONSOLE] Thinking analysis completed, using refined results")
+                final_df = thinking_result
+            else:
+                yield ('notification', "⚠️ Advanced thinking analysis failed, using original results")
+                gr.Warning("⚠️ Advanced analysis failed, using basic results")
+                print(f"[CONSOLE] Thinking analysis failed, using original results")
+        else:
+            yield ('notification', "✅ No deceptive patterns found, analysis complete!")
+            gr.Info("✅ No deceptive patterns detected!")
+            print(f"[CONSOLE] No deceptive patterns found, skipping thinking analysis")
+        yield ('result', final_df)
+        return
     yield ('result', None)
             # Make API call to Gemini with retry logic for thinking analysis
             try_cnt = 0
+            interaction = None
             while try_cnt < 2:
                 try:
                     try_cnt += 1
                     yield ('notification', f"🧠 Running advanced thinking analysis (attempt {try_cnt})...")
+                    print(f"[CONSOLE] Attempt {try_cnt} - Calling Gemini Interactions API for thinking...")
+                    interaction = client.interactions.create(
                         model='gemini-3-flash-preview',
+                        input=data,
+                        system_instruction=textsi_1,
+                        generation_config={
+                            'temperature': 1,
+                            'top_p': 0.1,
+                            'max_output_tokens': 45 * 1024,
+                            'thinking_level': 'high',
+                            'thinking_summaries': 'auto',
+                        },
                     )
                     yield ('notification', f"✅ Advanced thinking analysis API call successful!")
                     print(f"[CONSOLE] Thinking API call successful")
                         yield ('notification', error_msg)
                         print(f"[CONSOLE] Failed to get thinking response after {try_cnt} attempts")
                         raise gr.Error(f"Advanced analysis failed after {try_cnt} attempts")
                     wait_msg = f"⚠️ Server error in thinking analysis. Retrying attempt {try_cnt + 1}/2 in 60 seconds..."
                     yield ('notification', wait_msg)
                     gr.Warning(f"⚠️ Thinking server error. Retrying in 60s... (attempt {try_cnt + 1}/2)")
                     yield ('notification', error_msg)
                     print(f"[CONSOLE] Non-server error in thinking API call: {e}")
                     raise gr.Error(f"Thinking analysis API error: {str(e)}")
+            output_csv = _extract_model_text(interaction)
+            thought_txt = _extract_thought_text(interaction)
+            print(f"[CONSOLE] Extracted output CSV ({len(output_csv)} chars)")
+            print(f"[CONSOLE] Extracted thought text ({len(thought_txt)} chars)")
             _f = os.path.join(f"{eval_dir}", "gemini_fs", os.path.basename(f))
             _f_thought = os.path.join(f"{eval_dir}", "gemini_fs", os.path.basename(f).replace(".csv", "_thinking.txt"))
                 _f_thought_file.write(thought_txt)
             print(f"[CONSOLE] Thinking text saved to: {_f_thought}")
+            # Parse and validate; on failure, ask the model to self-correct once.
+            final_df, parse_error = _parse_response_to_df(output_csv, f, drop_existing=True)
+            if final_df is None:
+                yield ('notification', f"⚠️ Thinking output validation failed: {parse_error[:200]}. Asking Gemini to correct (1 retry)...")
+                gr.Info("⚠️ Thinking output invalid — asking Gemini to correct")
+                print(f"[CONSOLE] THINKING parse/validation failed: {parse_error}")
                 try:
+                    correction_interaction = client.interactions.create(
+                        model='gemini-3-flash-preview',
+                        input=_build_correction_request(parse_error),
+                        previous_interaction_id=interaction.id,
+                        system_instruction=textsi_1,
+                        generation_config={
+                            'temperature': 1,
+                            'top_p': 0.1,
+                            'max_output_tokens': 45 * 1024,
+                            'thinking_level': 'high',
+                            'thinking_summaries': 'auto',
+                        },
+                    )
+                    corrected_csv = _extract_model_text(correction_interaction)
+                    final_df, parse_error = _parse_response_to_df(corrected_csv, f, drop_existing=True)
+                    if final_df is not None:
+                        output_csv = corrected_csv
+                        interaction = correction_interaction
+                except Exception as e_corr:
+                    print(f"[CONSOLE] THINKING correction call failed: {e_corr}")
+                    parse_error = f"Correction API call failed: {e_corr}"
+                if final_df is None:
                     try:
+                        error_file = _f.replace(".csv", "_thinking_parse_error.txt")
+                        with open(error_file, 'w', encoding='utf-8') as _fs:
                             _fs.write(output_csv)
+                        print(f"[CONSOLE] Thinking failed response saved to: {error_file}")
                     except Exception as e3:
                         print(f"[CONSOLE] Failed to save thinking error response: {e3}")
+                    error_msg = f"❌ Thinking output could not be parsed even after correction: {parse_error}"
+                    yield ('notification', error_msg)
+                    raise gr.Error(f"Failed to parse thinking response after correction: {parse_error}")
+                else:
+                    yield ('notification', "✅ Gemini thinking correction succeeded — output parsed successfully")
+                    gr.Info("✅ Corrected thinking output parsed successfully!")
+            final_df.to_csv(_f, index=False, quoting=csv.QUOTE_ALL)
+            print(f"[CONSOLE] Thinking results saved to: {_f}")
+            yield ('result', final_df)
+            return
         except Exception as e:
             error_msg = f"❌ Error in thinking analysis: {str(e)}"
             yield ('notification', error_msg)