Spaces:

mgbam
/

AuditXCodeInsights

Sleeping

App Files Files Community

mgbam commited on Apr 7, 2025

Commit

f25bc7a

verified ·

1 Parent(s): f8f2363

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -34

app.py CHANGED Viewed

@@ -3,17 +3,17 @@ import google.generativeai as genai
 import zipfile
 import io
 import json
-import os  # Still needed for API key potentially, but not model names
 from pathlib import Path
 import time
 import plotly.express as px
 import pandas as pd
 # --- Configuration ---
-MAX_PROMPT_TOKENS_ESTIMATE = 800000  # Keep this estimate
 RESULTS_PAGE_SIZE = 25
-AVAILABLE_ANALYSES = {  # Keep analyses config
     "generate_docs": "Generate Missing Docstrings/Comments",
     "find_bugs": "Identify Potential Bugs & Anti-patterns",
     "check_style": "Check Style Guide Compliance (General)",
@@ -35,9 +35,9 @@ if 'error_message' not in st.session_state:
 if 'analysis_requested' not in st.session_state:
     st.session_state.analysis_requested = False
 if 'selected_model_name' not in st.session_state:
-    st.session_state.selected_model_name = None  # Will hold the "models/..." name
 if 'available_models_dict' not in st.session_state:
-    st.session_state.available_models_dict = {}  # Mapping display_name -> name
 # --- Gemini API Setup & Model Discovery ---
 model = None  # Global variable for the initialized model instance
@@ -97,7 +97,7 @@ def estimate_token_count(text):
     """
     Estimates the token count.
     If a string is provided, calculates based on its length.
-    If an integer (e.g. total char count) is provided, uses that directly.
     """
     if isinstance(text, int):
         return text // 3
@@ -164,18 +164,22 @@ def process_zip_file_cached(file_id, file_size, file_content_bytes):
 def construct_analysis_prompt(code_files_dict, requested_analyses):
     """
-    Constructs the prompt for analysis by including code files and a JSON structure for output.
     Returns the full prompt and a list of included files.
     """
-    prompt_parts = ["Analyze the following codebase...\n\n"]
-    current_token_estimate = estimate_token_count(prompt_parts[0])
     included_files = []
     code_segments = []
-    prompt_status = st.empty()
-    if len(code_files_dict) > 50:
-        prompt_status.info("Constructing prompt...")
     for filename, content in code_files_dict.items():
         segment = f"--- START FILE: {filename} ---\n{content}\n--- END FILE: {filename} ---\n\n"
         segment_token_estimate = estimate_token_count(segment)
@@ -184,32 +188,47 @@ def construct_analysis_prompt(code_files_dict, requested_analyses):
             current_token_estimate += segment_token_estimate
             included_files.append(filename)
         else:
-            st.warning(f"⚠️ Codebase may exceed context limit. Analyzed first {len(included_files)} files (~{current_token_estimate:,} tokens).")
             break
-    prompt_status.empty()
     if not included_files:
         st.error("🚨 No code files included in prompt.")
         return None, []
     prompt_parts.append("".join(code_segments))
-    json_structure_description = "{\n"
-    structure_parts = []
     if "generate_docs" in requested_analyses:
-        structure_parts.append('    "documentation_suggestions": [...]')
     if "find_bugs" in requested_analyses:
-        structure_parts.append('    "potential_bugs": [...]')
     if "check_style" in requested_analyses:
-        structure_parts.append('    "style_issues": [...]')
     if "summarize_modules" in requested_analyses:
-        structure_parts.append('    "module_summaries": [...]')
     if "suggest_refactoring" in requested_analyses:
-        structure_parts.append('    "refactoring_suggestions": [...]')
-    json_structure_description += ",\n".join(structure_parts) + "\n}"
-    prompt_footer = f"\n**Analysis Task:**...\n**Output Format:**...\n{json_structure_description}\n**JSON Output Only:**\n"
-    prompt_parts.append(prompt_footer)
     full_prompt = "".join(prompt_parts)
     return full_prompt, included_files
@@ -307,7 +326,7 @@ def call_gemini_api(prompt):
 def display_results(results_json, requested_analyses):
     """
-    Displays the analysis results with pagination and allows JSON download.
     """
     st.header("📊 Analysis Report")
     if not isinstance(results_json, dict):
@@ -416,7 +435,6 @@ with st.sidebar:
         value=st.session_state.mock_api_call,
         help="Use fake data instead of calling Gemini API."
     )
     st.divider()
     st.header("♊ Select Model")
     if not st.session_state.mock_api_call:
@@ -451,11 +469,10 @@ with st.sidebar:
     else:
         st.info("Mock API Mode ACTIVE")
         st.session_state.selected_model_name = "mock_model"
     st.divider()
     st.header("🔎 Select Analyses")
     selected_analyses = [
-        key for key, name in AVAILABLE_ANALYSES.items()
         if st.checkbox(name, value=True, key=f"cb_{key}")
     ]
     st.divider()
@@ -524,7 +541,7 @@ if uploaded_file:
             analyze_button_label = "Select Model First"
         elif analyze_button_disabled:
             analyze_button_label = "Select Analyses or Upload Valid Code"
         if analysis_button_placeholder.button(
             analyze_button_label,
             type="primary",
@@ -542,8 +559,8 @@ if uploaded_file:
             else:
                 with results_placeholder:
                     spinner_model_name = (
-                        st.session_state.selected_model_name
-                        if not st.session_state.mock_api_call
                         else "Mock Mode"
                     )
                     spinner_msg = f"🚀 Preparing prompt & contacting AI ({spinner_model_name})... Please wait."

 import zipfile
 import io
 import json
+import os  # For API key usage
 from pathlib import Path
 import time
 import plotly.express as px
 import pandas as pd
 # --- Configuration ---
+MAX_PROMPT_TOKENS_ESTIMATE = 800000  # Estimated token limit for the prompt
 RESULTS_PAGE_SIZE = 25
+AVAILABLE_ANALYSES = {
     "generate_docs": "Generate Missing Docstrings/Comments",
     "find_bugs": "Identify Potential Bugs & Anti-patterns",
     "check_style": "Check Style Guide Compliance (General)",
 if 'analysis_requested' not in st.session_state:
     st.session_state.analysis_requested = False
 if 'selected_model_name' not in st.session_state:
+    st.session_state.selected_model_name = None  # Holds internal model name
 if 'available_models_dict' not in st.session_state:
+    st.session_state.available_models_dict = {}  # Mapping: display_name -> internal name
 # --- Gemini API Setup & Model Discovery ---
 model = None  # Global variable for the initialized model instance
     """
     Estimates the token count.
     If a string is provided, calculates based on its length.
+    If an integer (total char count) is provided, uses that directly.
     """
     if isinstance(text, int):
         return text // 3
 def construct_analysis_prompt(code_files_dict, requested_analyses):
     """
+    Constructs the prompt for analysis by including code files and structured instructions.
+    The prompt now requests detailed feedback, including line references, severity, and recommended fixes.
     Returns the full prompt and a list of included files.
     """
+    prompt_parts = [
+        "You are a highly skilled code auditor. Analyze the following codebase in detail.\n",
+        "For each issue, provide:\n",
+        "  - A short summary with line references (or approximate line references).\n",
+        "  - A severity level (Low, Medium, High).\n",
+        "  - A recommended fix or code snippet if applicable.\n\n",
+        "Here is the code:\n\n"
+    ]
+    current_token_estimate = estimate_token_count("".join(prompt_parts))
     included_files = []
     code_segments = []
     for filename, content in code_files_dict.items():
         segment = f"--- START FILE: {filename} ---\n{content}\n--- END FILE: {filename} ---\n\n"
         segment_token_estimate = estimate_token_count(segment)
             current_token_estimate += segment_token_estimate
             included_files.append(filename)
         else:
+            st.warning(f"⚠️ Exceeded context limit after {len(included_files)} files.")
             break
     if not included_files:
         st.error("🚨 No code files included in prompt.")
         return None, []
     prompt_parts.append("".join(code_segments))
+    prompt_parts.append("\n\nYour tasks are:\n")
     if "generate_docs" in requested_analyses:
+        prompt_parts.append(
+            "1) Generate missing docstrings/comments using PEP 257 style. Provide recommended text and line references.\n"
+        )
     if "find_bugs" in requested_analyses:
+        prompt_parts.append(
+            "2) Identify potential bugs & anti-patterns. For each, include severity, line references, and a recommended fix.\n"
+        )
     if "check_style" in requested_analyses:
+        prompt_parts.append(
+            "3) Check style guide compliance (PEP 8 or similar). Include line references, severity, and suggested changes.\n"
+        )
     if "summarize_modules" in requested_analyses:
+        prompt_parts.append(
+            "4) Summarize each module/file by describing its primary responsibilities.\n"
+        )
     if "suggest_refactoring" in requested_analyses:
+        prompt_parts.append(
+            "5) Suggest refactoring opportunities with code snippets and justification, including line references.\n"
+        )
+    prompt_parts.append(
+        "\nFormat your response in valid JSON with the following structure:\n"
+        "{\n"
+        "   \"documentation_suggestions\": [ {\"file\": \"...\", \"line\": \"...\", \"summary\": \"...\", \"severity\": \"Low|Medium|High\", \"suggestion\": \"...\"}, ... ],\n"
+        "   \"potential_bugs\": [ {\"file\": \"...\", \"line\": \"...\", \"summary\": \"...\", \"severity\": \"Low|Medium|High\", \"suggestion\": \"...\"}, ... ],\n"
+        "   \"style_issues\": [ ... ],\n"
+        "   \"module_summaries\": [ {\"file\": \"...\", \"summary\": \"...\"}, ... ],\n"
+        "   \"refactoring_suggestions\": [ {\"file\": \"...\", \"line\": \"...\", \"area\": \"...\", \"summary\": \"...\", \"suggestion\": \"...\"}, ... ]\n"
+        "}\n"
+        "Only output valid JSON (no markdown formatting)!\n"
+    )
     full_prompt = "".join(prompt_parts)
     return full_prompt, included_files
 def display_results(results_json, requested_analyses):
     """
+    Displays the analysis results with pagination and a JSON download option.
     """
     st.header("📊 Analysis Report")
     if not isinstance(results_json, dict):
         value=st.session_state.mock_api_call,
         help="Use fake data instead of calling Gemini API."
     )
     st.divider()
     st.header("♊ Select Model")
     if not st.session_state.mock_api_call:
     else:
         st.info("Mock API Mode ACTIVE")
         st.session_state.selected_model_name = "mock_model"
     st.divider()
     st.header("🔎 Select Analyses")
     selected_analyses = [
+        key for key, name in AVAILABLE_ANALYSES.items()
         if st.checkbox(name, value=True, key=f"cb_{key}")
     ]
     st.divider()
             analyze_button_label = "Select Model First"
         elif analyze_button_disabled:
             analyze_button_label = "Select Analyses or Upload Valid Code"
         if analysis_button_placeholder.button(
             analyze_button_label,
             type="primary",
             else:
                 with results_placeholder:
                     spinner_model_name = (
+                        st.session_state.selected_model_name
+                        if not st.session_state.mock_api_call
                         else "Mock Mode"
                     )
                     spinner_msg = f"🚀 Preparing prompt & contacting AI ({spinner_model_name})... Please wait."