AIcodemetadata13

Sleeping

App Files Files Community

Bhanumani12 commited on Aug 20, 2025

Commit

692050b

verified ·

1 Parent(s): 34f2d02

Update app.py

Browse files

Files changed (1) hide show

app.py +209 -87

app.py CHANGED Viewed

@@ -1,10 +1,14 @@
 import os
-import gradio as gr
 from datetime import datetime
-from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
 from simple_salesforce import Salesforce, SalesforceLogin
 from dotenv import load_dotenv
-import xml.etree.ElementTree as ET
 # ---------- Load Environment Variables ----------
 load_dotenv()
@@ -12,10 +16,47 @@ SF_USERNAME = os.getenv("SF_USERNAME")
 SF_PASSWORD = os.getenv("SF_PASSWORD")
 SF_SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN")
 # ---------- Logging ----------
 def log_to_console(data, log_type):
     timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-    print(f"[{timestamp}] {log_type}: {data}")
 # ---------- Salesforce Connection ----------
 try:
@@ -30,60 +71,87 @@ except Exception as e:
     sf = None
     print(f"❌ Failed to connect to Salesforce: {e}")
-# ---------- Load Hugging Face Models ----------
-print("⏳ Loading Hugging Face models...")
-# CodeBERT for code analysis
-codebert_tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")
-codebert_model = AutoModelForSequenceClassification.from_pretrained(
-    "microsoft/codebert-base", num_labels=4
-)
-codebert_pipeline = pipeline("text-classification", model=codebert_model, tokenizer=codebert_tokenizer)
-# Flan-T5 for Q&A / metadata
-qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-large")
-print("✅ Models loaded")
-# ---------- Label Mapping ----------
-label_to_issue_type = {
-    0: "Performance",
-    1: "Error",
-    2: "Security",
-    3: "Best Practice"
-}
-suggestions = {
-    "Performance": "Optimize loops and reduce SOQL queries inside loops.",
-    "Error": "Add error handling (try-catch) and null checks for safer execution.",
-    "Security": "Avoid dynamic SOQL; use bind variables to prevent SOQL injection.",
-    "Best Practice": "Refactor for readability, use bulk-safe patterns."
-}
-severities = {
-    "Performance": "Medium",
-    "Error": "High",
-    "Security": "High",
-    "Best Practice": "Low"
-}
 # ---------- Code Analyzer ----------
 def analyze_code(code):
-    if not code.strip():
         return "No code provided.", "", ""
-    result = codebert_pipeline(code[:512])  # truncate to prevent model overload
-    label_id = int(result[0]["label"].replace("LABEL_", "")) if "LABEL_" in result[0]["label"] else 0
-    issue_type = label_to_issue_type[label_id]
-    suggestion = suggestions[issue_type]
-    severity = severities[issue_type]
     review_data = {
         "Name": f"Review_{issue_type}",
         "CodeSnippet__c": code,
         "IssueType__c": issue_type,
-        "Suggestion__c": suggestion,
         "Severity__c": severity
     }
@@ -94,34 +162,75 @@ def analyze_code(code):
             result = sf.CodeReviewResult__c.create(review_data)
             if result.get("success"):
                 log_to_console({"Salesforce Record ID": result["id"]}, "Salesforce Create")
         except Exception as e:
             log_to_console({"Salesforce Exception": str(e)}, "Salesforce Error")
-    return issue_type, suggestion, severity
 # ---------- Metadata Validator ----------
 def validate_metadata(metadata, admin_id=None):
-    if not metadata.strip():
         return "No metadata provided.", "", ""
-    mtype, issue, recommendation = "Field", "", ""
     try:
         root = ET.fromstring(metadata)
-        description_found = any(elem.tag.endswith('description') for elem in root)
-        if not description_found:
-            issue = "Missing description"
-            recommendation = "Add a <description> tag to improve clarity."
         else:
-            # Use AI for deeper suggestions
-            response = qa_pipeline(
-                f"Review this Salesforce metadata and suggest improvements:\n{metadata}",
-                max_new_tokens=60
-            )
-            issue = "Potential optimization"
-            recommendation = response[0]["generated_text"].strip()
     except Exception as e:
         issue = "Invalid XML"
         recommendation = f"Could not parse metadata XML. Error: {str(e)}"
@@ -132,6 +241,7 @@ def validate_metadata(metadata, admin_id=None):
         "Recommendation__c": recommendation,
         "Status__c": "Open"
     }
     if admin_id:
         log_data["Admin__c"] = admin_id
@@ -141,54 +251,64 @@ def validate_metadata(metadata, admin_id=None):
         try:
             result = sf.MetadataAuditLog__c.create(log_data)
             if result.get("success"):
-                log_to_console({"Salesforce MetadataAuditLog ID": result["id"]}, "Salesforce Create")
         except Exception as e:
             log_to_console({"Salesforce Exception": str(e)}, "Salesforce Error")
     return mtype, issue, recommendation
-# ---------- Salesforce Chatbot ----------
 conversation_history = []
 def salesforce_chatbot(query, history=[]):
     global conversation_history
-    if not query.strip():
         return "Please provide a valid Salesforce-related question."
     salesforce_keywords = [
-        "apex", "soql", "trigger", "lwc", "visualforce", "salesforce",
-        "governor limits", "dml", "metadata", "batch apex", "queueable"
     ]
-    if not any(k in query.lower() for k in salesforce_keywords):
         return "Please ask a Salesforce-related question."
     history_summary = "\n".join([f"User: {q}\nAssistant: {a}" for q, a in conversation_history[-4:]])
-    prompt = f"""
-You are a certified Salesforce architect. Always provide accurate, production-safe answers
-with examples, governor limits, and Trailhead references when possible.
-Conversation history:
-{history_summary}
-User: {query}
-Assistant:
-"""
     try:
-        result = qa_pipeline(prompt, max_new_tokens=256, do_sample=False, temperature=0.1)
-        output = result[0]["generated_text"].strip()
-        conversation_history.append((query, output))
         conversation_history = conversation_history[-6:]
-        return output
     except Exception as e:
-        return f"⚠️ Error: {str(e)}"
 # ---------- Gradio UI ----------
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🤖 Salesforce AI Code Review & Metadata Auditor")
     with gr.Tab("Code Review"):
-        code_input = gr.Textbox(label="Apex / LWC Code", lines=8)
         issue_type = gr.Textbox(label="Issue Type")
         suggestion = gr.Textbox(label="AI Suggestion")
         severity = gr.Textbox(label="Severity")
@@ -196,7 +316,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         code_button.click(analyze_code, inputs=code_input, outputs=[issue_type, suggestion, severity])
     with gr.Tab("Metadata Validation"):
-        metadata_input = gr.Textbox(label="Metadata XML", lines=8)
         mtype = gr.Textbox(label="Type")
         issue = gr.Textbox(label="Issue")
         recommendation = gr.Textbox(label="Recommendation")
@@ -205,13 +325,15 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     with gr.Tab("Salesforce Chatbot"):
         chatbot_output = gr.Chatbot(label="Conversation History", height=400)
-        query_input = gr.Textbox(label="Your Question", placeholder="e.g., How many DML operations are allowed?")
         with gr.Row():
             chatbot_button = gr.Button("Ask")
             clear_button = gr.Button("Clear Chat")
         chat_state = gr.State(value=[])
         def update_chatbot(query, chat_history):
             response = salesforce_chatbot(query, chat_history)
             chat_history.append((query, response))
             return chat_history, ""

 import os
+import re
+import json
+import random
+import xml.etree.ElementTree as ET
 from datetime import datetime
+import gradio as gr
+from transformers import pipeline
 from simple_salesforce import Salesforce, SalesforceLogin
 from dotenv import load_dotenv
 # ---------- Load Environment Variables ----------
 load_dotenv()
 SF_PASSWORD = os.getenv("SF_PASSWORD")
 SF_SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN")
+# ---------- Label Mapping (kept; now used as fallback) ----------
+label_to_issue_type = {
+    "LABEL_0": "Performance",
+    "LABEL_1": "Error",
+    "LABEL_2": "Security",
+    "LABEL_3": "Best Practice"
+}
+suggestions = {
+    "Performance": "Consider optimizing loops and database access. Use collections to reduce SOQL/DML calls, avoid SOQL/DML inside loops, and add selective WHERE clauses.",
+    "Error": "Add proper error handling and null checks. Wrap DML in try/catch and use Database methods for partial success.",
+    "Security": "Avoid dynamic SOQL. Use bind variables, withSharing, and field-level security checks where applicable.",
+    "Best Practice": "Refactor for readability and bulk-safety (Batchable/Queueable where needed). Limit fields and records in queries."
+}
+severities = {
+    "Performance": "Medium",
+    "Error": "High",
+    "Security": "High",
+    "Best Practice": "Low"
+}
+# ---------- Hugging Face Models (Hugging Face only, per BRD/SDD) ----------
+# Lightweight BLOOMZ for natural language support
+try:
+    nlp_pipeline = pipeline("text-generation", model="bigscience/bloomz-560m")
+except Exception as e:
+    nlp_pipeline = None
+    print(f"⚠️ Could not load BLOOMZ model: {e}")
+# Optional: simple classifier (kept minimal; not strictly required)
+try:
+    clf_pipeline = pipeline("text-classification", model="microsoft/codebert-base")
+except Exception as e:
+    clf_pipeline = None
+    print(f"⚠️ Could not load CodeBERT classifier: {e}")
 # ---------- Logging ----------
 def log_to_console(data, log_type):
     timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    print(f"[{timestamp}] {log_type} Log: {data}")
 # ---------- Salesforce Connection ----------
 try:
     sf = None
     print(f"❌ Failed to connect to Salesforce: {e}")
+# ---------- Heuristic Rules for Apex/LWC ----------
+SOQL_PATTERN = re.compile(r"\b(?:Database\.query|SELECT\s+[\s\S]+?FROM\b)", re.IGNORECASE)
+DML_PATTERN = re.compile(r"\b(insert|update|upsert|delete|undelete|merge)\b", re.IGNORECASE)
+LOOP_PATTERN = re.compile(r"\b(for\s*\(|while\s*\()", re.IGNORECASE)
+DEBUG_PATTERN = re.compile(r"\bSystem\.debug\s*\(", re.IGNORECASE)
+DYNAMIC_SOQL_PATTERN = re.compile(r"['\"].*SELECT.*FROM.*['\"]\s*\+\s*", re.IGNORECASE)
+UNBOUNDED_QUERY_PATTERN = re.compile(r"SELECT\s+\*\s+FROM", re.IGNORECASE)  # LWC/JS cases
+NULL_GUARD_PATTERN = re.compile(r"\b(\w+)\.(\w+)\(", re.IGNORECASE)  # very rough
+def analyze_code_rules(code: str):
+    issues = []
+    # SOQL/DML inside loops
+    for loop in LOOP_PATTERN.finditer(code):
+        loop_block = code[loop.start(): loop.start()+400]  # shallow lookahead
+        if SOQL_PATTERN.search(loop_block):
+            issues.append(("Performance", "SOQL query inside a loop detected. Move query outside the loop or use collections."))
+        if DML_PATTERN.search(loop_block):
+            issues.append(("Performance", "DML operation inside a loop detected. Bulkify by collecting records and performing DML once."))
+    # Dynamic SOQL
+    if DYNAMIC_SOQL_PATTERN.search(code):
+        issues.append(("Security", "Dynamic SOQL concatenation detected. Use bind variables to prevent injection."))
+    # Excessive debug statements
+    dbg_count = len(DEBUG_PATTERN.findall(code))
+    if dbg_count > 2:
+        issues.append(("Best Practice", f"Found {dbg_count} System.debug statements. Remove or gate them for production."))
+    # Unbounded queries (JS/LWC anti-patterns)
+    if UNBOUNDED_QUERY_PATTERN.search(code):
+        issues.append(("Performance", "Unbounded SELECT * detected. Query only required fields."))
+    # (Very) rough null guard hint
+    # Suggest using null-checks where chained dereferences are visible
+    dot_calls = len(NULL_GUARD_PATTERN.findall(code))
+    if dot_calls > 15:
+        issues.append(("Error", "Multiple chained calls detected. Ensure null checks and guard clauses to avoid NullPointerExceptions."))
+    # If classifier is available, add its hint as a final tag
+    if clf_pipeline:
+        try:
+            pred = clf_pipeline(code[:1000])[0]  # keep it small
+            mapped = label_to_issue_type.get(pred.get("label"), "Best Practice")
+            issues.append((mapped, f"Model hint: {mapped} issue likely. Confidence ~{pred.get('score', 0):.2f}"))
+        except Exception:
+            pass
+    # Deduplicate by message
+    seen = set()
+    deduped = []
+    for t, msg in issues:
+        if msg not in seen:
+            seen.add(msg)
+            deduped.append((t, msg))
+    return deduped
+def pick_primary(issues):
+    # Priority: Security/Error > Performance > Best Practice
+    prio = {"Security": 3, "Error": 3, "Performance": 2, "Best Practice": 1}
+    if not issues:
+        return ("Best Practice", suggestions["Best Practice"], severities["Best Practice"])
+    issues_sorted = sorted(issues, key=lambda x: prio.get(x[0], 0), reverse=True)
+    top_type = issues_sorted[0][0]
+    # Merge messages into one suggestion
+    merged = "; ".join(msg for _, msg in issues_sorted[:3])
+    return (top_type, merged or suggestions[top_type], severities[top_type])
 # ---------- Code Analyzer ----------
 def analyze_code(code):
+    if not code or not code.strip():
         return "No code provided.", "", ""
+    issues = analyze_code_rules(code)
+    issue_type, suggestion_text, severity = pick_primary(issues)
     review_data = {
         "Name": f"Review_{issue_type}",
         "CodeSnippet__c": code,
         "IssueType__c": issue_type,
+        "Suggestion__c": suggestion_text,
         "Severity__c": severity
     }
             result = sf.CodeReviewResult__c.create(review_data)
             if result.get("success"):
                 log_to_console({"Salesforce Record ID": result["id"]}, "Salesforce Create")
+            else:
+                log_to_console(result, "Salesforce Error")
         except Exception as e:
             log_to_console({"Salesforce Exception": str(e)}, "Salesforce Error")
+    else:
+        log_to_console("Salesforce not connected.", "Salesforce Error")
+    return issue_type, suggestion_text, severity
 # ---------- Metadata Validator ----------
 def validate_metadata(metadata, admin_id=None):
+    if not metadata or not metadata.strip():
         return "No metadata provided.", "", ""
+    mtype = "Object"
+    issue = "No issues detected."
+    recommendation = "Looks good."
     try:
         root = ET.fromstring(metadata)
+        # 1) Description present?
+        has_description = any(elem.tag.lower().endswith('description') and (elem.text or '').strip() for elem in root.iter())
+        # 2) Duplicate <fullName> or field names?
+        names = []
+        duplicates = set()
+        for elem in root.iter():
+            tag = elem.tag.lower()
+            if tag.endswith('fullname') or tag.endswith('name'):
+                if elem.text:
+                    val = elem.text.strip()
+                    if val in names:
+                        duplicates.add(val)
+                    names.append(val)
+        # 3) Fields missing helpText/description
+        missing_help = []
+        for f in root.iter():
+            if f.tag.lower().endswith('fields'):
+                # look for nested field fullName
+                fname = None
+                fdesc = None
+                fhelp = None
+                for ch in f:
+                    t = ch.tag.lower()
+                    if t.endswith('fullname') and ch.text:
+                        fname = ch.text.strip()
+                    if t.endswith('description') and ch.text:
+                        fdesc = ch.text.strip()
+                    if t.endswith('helptext') and ch.text:
+                        fhelp = ch.text.strip()
+                if fname and not (fdesc or fhelp):
+                    missing_help.append(fname)
+        problems = []
+        if not has_description:
+            problems.append("Missing <description> on the object/metadata.")
+        if duplicates:
+            problems.append(f"Duplicate names detected: {', '.join(sorted(list(duplicates)))}.")
+        if missing_help:
+            problems.append(f"Fields missing description/helpText: {', '.join(missing_help[:10])}" + ("..." if len(missing_help) > 10 else ""))
+        if problems:
+            issue = " | ".join(problems)
+            recommendation = "Add descriptions/helpText; remove duplicates; follow naming standards."
         else:
+            issue = "No high-severity issues detected."
+            recommendation = "Consider adding descriptions and reviewing picklists for inactive values."
     except Exception as e:
+        mtype = "Unknown"
         issue = "Invalid XML"
         recommendation = f"Could not parse metadata XML. Error: {str(e)}"
         "Recommendation__c": recommendation,
         "Status__c": "Open"
     }
     if admin_id:
         log_data["Admin__c"] = admin_id
         try:
             result = sf.MetadataAuditLog__c.create(log_data)
             if result.get("success"):
+                log_to_console({"Salesforce MetadataAuditLog Record ID": result["id"]}, "Salesforce Create")
+            else:
+                log_to_console(result, "Salesforce Metadata Error")
         except Exception as e:
             log_to_console({"Salesforce Exception": str(e)}, "Salesforce Error")
+    else:
+        log_to_console("Salesforce not connected.", "Salesforce Error")
     return mtype, issue, recommendation
+# ---------- Salesforce Chatbot (BLOOMZ) ----------
 conversation_history = []
 def salesforce_chatbot(query, history=[]):
     global conversation_history
+    if not query or not query.strip():
         return "Please provide a valid Salesforce-related question."
     salesforce_keywords = [
+        "apex", "soql", "trigger", "lwc", "aura", "visualforce", "salesforce", "governor limits",
+        "dml", "metadata", "batch apex", "queueable", "future method", "api", "sfdc", "heap", "limits"
     ]
+    if not any(keyword.lower() in query.lower() for keyword in salesforce_keywords):
         return "Please ask a Salesforce-related question."
     history_summary = "\n".join([f"User: {q}\nAssistant: {a}" for q, a in conversation_history[-4:]])
+    system_prompt = (
+        "You are a certified Salesforce developer and architect. Answer with correct, production-safe guidance. "
+        "When relevant, mention governor limits (e.g., 100 SOQL queries per transaction, 150 DML statements). "
+        "Use bullets or code snippets. Prefer bulk-safe patterns and official docs."
+    )
+    prompt = f"{system_prompt}\n\nConversation History:\n{history_summary}\n\nUser: {query.strip()}\nAssistant:"
     try:
+        if nlp_pipeline:
+            out = nlp_pipeline(prompt, max_new_tokens=220, do_sample=False)[0]["generated_text"].strip()
+        else:
+            out = "Governor limits matter (e.g., 100 SOQL queries/tx, 150 DML). Use bulk patterns, selective queries, and proper error handling."
+        # Keep answer reasonable length
+        if len(out.split()) < 15:
+            out += "\n\nTip: Use Database.insert with allOrNone=false for partial success and check Limits class."
+        conversation_history.append((query, out))
         conversation_history = conversation_history[-6:]
+        log_to_console({"Question": query, "Answer": out}, "Chatbot Query")
+        return out
     except Exception as e:
+        return f"⚠️ Error generating response: {str(e)}"
 # ---------- Gradio UI ----------
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🤖 Advanced Salesforce AI Code Review & Chatbot")
     with gr.Tab("Code Review"):
+        code_input = gr.Textbox(label="Apex / LWC Code", lines=8, placeholder="Enter your Apex or LWC code here")
         issue_type = gr.Textbox(label="Issue Type")
         suggestion = gr.Textbox(label="AI Suggestion")
         severity = gr.Textbox(label="Severity")
         code_button.click(analyze_code, inputs=code_input, outputs=[issue_type, suggestion, severity])
     with gr.Tab("Metadata Validation"):
+        metadata_input = gr.Textbox(label="Metadata XML", lines=8, placeholder="Enter your metadata XML here")
         mtype = gr.Textbox(label="Type")
         issue = gr.Textbox(label="Issue")
         recommendation = gr.Textbox(label="Recommendation")
     with gr.Tab("Salesforce Chatbot"):
         chatbot_output = gr.Chatbot(label="Conversation History", height=400)
+        query_input = gr.Textbox(label="Your Question", placeholder="e.g., How many DML operations are allowed in Apex?")
         with gr.Row():
             chatbot_button = gr.Button("Ask")
             clear_button = gr.Button("Clear Chat")
         chat_state = gr.State(value=[])
         def update_chatbot(query, chat_history):
+            if not query.strip():
+                return chat_history, "Please enter a valid question."
             response = salesforce_chatbot(query, chat_history)
             chat_history.append((query, response))
             return chat_history, ""