AIcodemetadata13

Sleeping

App Files Files Community

Bhanumani12 commited on Aug 20, 2025

Commit

6fd3baf

verified ·

1 Parent(s): 61ea1a4

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -186

app.py CHANGED Viewed

@@ -1,15 +1,11 @@
-# app.py
 import os
-import re
-import json
 import random
-import xml.etree.ElementTree as ET
-from datetime import datetime
 import gradio as gr
 from transformers import pipeline
 from simple_salesforce import Salesforce, SalesforceLogin
 from dotenv import load_dotenv
 # ---------- Load Environment Variables ----------
 load_dotenv()
@@ -17,7 +13,7 @@ SF_USERNAME = os.getenv("SF_USERNAME")
 SF_PASSWORD = os.getenv("SF_PASSWORD")
 SF_SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN")
-# ---------- Label Mapping (kept for model hint mapping) ----------
 label_to_issue_type = {
     "LABEL_0": "Performance",
     "LABEL_1": "Error",
@@ -26,10 +22,10 @@ label_to_issue_type = {
 }
 suggestions = {
-    "Performance": "Consider optimizing loops and database access. Use collections to reduce SOQL/DML calls, avoid SOQL/DML inside loops, and add selective WHERE clauses.",
-    "Error": "Add proper error handling and null checks. Wrap DML in try/catch and use Database methods for partial success.",
-    "Security": "Avoid dynamic SOQL. Use bind variables, with sharing, and field-level security checks where applicable.",
-    "Best Practice": "Refactor for readability and bulk-safety (Batchable/Queueable where needed). Limit fields and records in queries."
 }
 severities = {
@@ -39,20 +35,8 @@ severities = {
     "Best Practice": "Low"
 }
-# ---------- Hugging Face Models (Hugging Face only, per BRD/SDD) ----------
-# NLP for chatbot
-try:
-    nlp_pipeline = pipeline("text-generation", model="bigscience/bloomz-560m")
-except Exception as e:
-    nlp_pipeline = None
-    print(f"⚠️ Could not load BLOOMZ model: {e}")
-# Optional classifier for a small hint in code analysis (not required)
-try:
-    clf_pipeline = pipeline("text-classification", model="microsoft/codebert-base")
-except Exception as e:
-    clf_pipeline = None
-    print(f"⚠️ Could not load CodeBERT classifier: {e}")
 # ---------- Logging ----------
 def log_to_console(data, log_type):
@@ -72,85 +56,21 @@ except Exception as e:
     sf = None
     print(f"❌ Failed to connect to Salesforce: {e}")
-# ---------- Heuristic Rules for Apex/LWC (governor, security, best-practice) ----------
-SOQL_PATTERN = re.compile(r"\b(?:Database\.query|SELECT\s+[\s\S]+?FROM\b)", re.IGNORECASE)
-DML_PATTERN = re.compile(r"\b(insert|update|upsert|delete|undelete|merge)\b", re.IGNORECASE)
-LOOP_PATTERN = re.compile(r"\b(for\s*\(|while\s*\()", re.IGNORECASE)
-DEBUG_PATTERN = re.compile(r"\bSystem\.debug\s*\(", re.IGNORECASE)
-DYNAMIC_SOQL_PATTERN = re.compile(r"['\"].*SELECT.*FROM.*['\"]\s*\+\s*", re.IGNORECASE)
-UNBOUNDED_QUERY_PATTERN = re.compile(r"SELECT\s+\*\s+FROM", re.IGNORECASE)  # JS/LWC anti-pattern
-NULL_GUARD_PATTERN = re.compile(r"\b(\w+)\.(\w+)\(", re.IGNORECASE)  # rough chained-call detector
-def analyze_code_rules(code: str):
-    issues = []
-    # SOQL/DML inside loops
-    for loop in LOOP_PATTERN.finditer(code):
-        loop_block = code[loop.start(): loop.start() + 400]  # shallow lookahead
-        if SOQL_PATTERN.search(loop_block):
-            issues.append(("Performance", "SOQL query inside a loop detected. Move query outside the loop or use collections."))
-        if DML_PATTERN.search(loop_block):
-            issues.append(("Performance", "DML operation inside a loop detected. Bulkify by collecting records and performing DML once."))
-    # Dynamic SOQL
-    if DYNAMIC_SOQL_PATTERN.search(code):
-        issues.append(("Security", "Dynamic SOQL concatenation detected. Use bind variables to prevent injection."))
-    # Excessive debug statements
-    dbg_count = len(DEBUG_PATTERN.findall(code))
-    if dbg_count > 2:
-        issues.append(("Best Practice", f"Found {dbg_count} System.debug statements. Remove or gate them for production."))
-    # Unbounded queries (JS/LWC anti-patterns)
-    if UNBOUNDED_QUERY_PATTERN.search(code):
-        issues.append(("Performance", "Unbounded SELECT * detected. Query only required fields."))
-    # (Very) rough null guard hint
-    dot_calls = len(NULL_GUARD_PATTERN.findall(code))
-    if dot_calls > 15:
-        issues.append(("Error", "Multiple chained calls detected. Ensure null checks and guard clauses to avoid NullPointerExceptions."))
-    # Optional classifier hint
-    if clf_pipeline:
-        try:
-            pred = clf_pipeline(code[:1000])[0]  # short context
-            mapped = label_to_issue_type.get(pred.get("label"), "Best Practice")
-            issues.append((mapped, f"Model hint: {mapped} issue likely. Confidence ~{pred.get('score', 0):.2f}"))
-        except Exception:
-            pass
-    # Deduplicate by message
-    seen = set()
-    deduped = []
-    for t, msg in issues:
-        if msg not in seen:
-            seen.add(msg)
-            deduped.append((t, msg))
-    return deduped
-def pick_primary(issues):
-    # Priority: Security/Error > Performance > Best Practice
-    prio = {"Security": 3, "Error": 3, "Performance": 2, "Best Practice": 1}
-    if not issues:
-        return ("Best Practice", suggestions["Best Practice"], severities["Best Practice"])
-    issues_sorted = sorted(issues, key=lambda x: prio.get(x[0], 0), reverse=True)
-    top_type = issues_sorted[0][0]
-    merged = "; ".join(msg for _, msg in issues_sorted[:3])
-    return (top_type, merged or suggestions[top_type], severities[top_type])
-# ---------- Code Analyzer (UI callback) ----------
 def analyze_code(code):
-    if not code or not code.strip():
         return "No code provided.", "", ""
-    issues = analyze_code_rules(code)
-    issue_type, suggestion_text, severity = pick_primary(issues)
     review_data = {
         "Name": f"Review_{issue_type}",
         "CodeSnippet__c": code,
         "IssueType__c": issue_type,
-        "Suggestion__c": suggestion_text,
         "Severity__c": severity
     }
@@ -168,65 +88,28 @@ def analyze_code(code):
     else:
         log_to_console("Salesforce not connected.", "Salesforce Error")
-    return issue_type, suggestion_text, severity
-# ---------- Metadata Validator (UI callback) ----------
 def validate_metadata(metadata, admin_id=None):
-    if not metadata or not metadata.strip():
         return "No metadata provided.", "", ""
-    mtype = "Object"
-    issue = "No issues detected."
-    recommendation = "Looks good."
     try:
         root = ET.fromstring(metadata)
-        # 1) Description present?
-        has_description = any(elem.tag.lower().endswith('description') and (elem.text or '').strip() for elem in root.iter())
-        # 2) Duplicate <fullName> or generic <name> values?
-        names = []
-        duplicates = set()
-        for elem in root.iter():
-            tag = elem.tag.lower()
-            if tag.endswith('fullname') or tag.endswith('name'):
-                if elem.text:
-                    val = elem.text.strip()
-                    if val in names:
-                        duplicates.add(val)
-                    names.append(val)
-        # 3) Fields missing helpText/description
-        missing_help = []
-        for f in root.iter():
-            if f.tag.lower().endswith('fields'):
-                fname, fdesc, fhelp = None, None, None
-                for ch in f:
-                    t = ch.tag.lower()
-                    if t.endswith('fullname') and ch.text:
-                        fname = ch.text.strip()
-                    if t.endswith('description') and ch.text:
-                        fdesc = ch.text.strip()
-                    if t.endswith('helptext') and ch.text:
-                        fhelp = ch.text.strip()
-                if fname and not (fdesc or fhelp):
-                    missing_help.append(fname)
-        problems = []
-        if not has_description:
-            problems.append("Missing <description> on the object/metadata.")
-        if duplicates:
-            problems.append(f"Duplicate names detected: {', '.join(sorted(list(duplicates)))}.")
-        if missing_help:
-            problems.append(f"Fields missing description/helpText: {', '.join(missing_help[:10])}" + ("..." if len(missing_help) > 10 else ""))
-        if problems:
-            issue = " | ".join(problems)
-            recommendation = "Add descriptions/helpText; remove duplicates; follow naming standards."
-        else:
-            issue = "No high-severity issues detected."
-            recommendation = "Consider adding descriptions and reviewing picklists for inactive values."
     except Exception as e:
-        mtype = "Unknown"
         issue = "Invalid XML"
         recommendation = f"Could not parse metadata XML. Error: {str(e)}"
@@ -257,68 +140,59 @@ def validate_metadata(metadata, admin_id=None):
     return mtype, issue, recommendation
-# ---------- Chatbot helpers (no hardcoded answers; model-only) ----------
-def _clean_llm_reply(generated: str) -> str:
-    """Strip prompt echoing and keep only the assistant's part."""
-    text = generated or ""
-    # Keep only content after the last 'Assistant:'
-    if "Assistant:" in text:
-        text = text.split("Assistant:")[-1]
-    # Remove any lines that start with 'User:' to avoid echo
-    lines = [line for line in text.splitlines() if not line.strip().startswith("User:")]
-    cleaned = "\n".join(lines).strip()
-    cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
-    return cleaned
 def salesforce_chatbot(query, history=[]):
     global conversation_history
-    if not query or not query.strip():
         return "Please provide a valid Salesforce-related question."
     salesforce_keywords = [
         "apex", "soql", "trigger", "lwc", "aura", "visualforce", "salesforce", "governor limits",
         "dml", "metadata", "batch apex", "queueable", "future method", "api", "sfdc", "heap", "limits"
     ]
-    if not any(k in query.lower() for k in salesforce_keywords):
         return "Please ask a Salesforce-related question."
     history_summary = "\n".join([f"User: {q}\nAssistant: {a}" for q, a in conversation_history[-4:]])
-    system_prompt = (
-        "You are a certified Salesforce developer and architect. Answer with correct, production-safe guidance. "
-        "When relevant, mention governor limits (e.g., 100 SOQL queries per transaction, 150 DML statements). "
-        "Use bullets or code snippets. Prefer bulk-safe patterns and official docs. "
-        "Do NOT repeat the user's question in your answer."
-    )
-    prompt = f"{system_prompt}\n\nConversation History:\n{history_summary}\n\nUser: {query.strip()}\nAssistant:"
     try:
-        if nlp_pipeline:
-            gen = nlp_pipeline(
-                prompt,
-                max_new_tokens=220,
-                do_sample=False
-            )[0]["generated_text"]
-            out = _clean_llm_reply(gen)
-        else:
-            out = "⚠️ NLP model not available. Please check Hugging Face pipeline."
-        # Ensure non-trivial response
-        if len(out.split()) < 12:
-            out += "\n\nRefer to the official docs: https://developer.salesforce.com/docs"
-        conversation_history.append((query, out))
         conversation_history = conversation_history[-6:]
-        log_to_console({"Question": query, "Answer": out}, "Chatbot Query")
-        return out
     except Exception as e:
         return f"⚠️ Error generating response: {str(e)}"
 # ---------- Gradio UI ----------
-conversation_history = []
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🤖 Advanced Salesforce AI Code Review & Chatbot")
@@ -362,4 +236,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         clear_button.click(fn=clear_chat, inputs=None, outputs=[chatbot_output, query_input])
 if __name__ == "__main__":
-    demo.launch()

 import os
 import random
 import gradio as gr
+from datetime import datetime
 from transformers import pipeline
 from simple_salesforce import Salesforce, SalesforceLogin
 from dotenv import load_dotenv
+import xml.etree.ElementTree as ET
 # ---------- Load Environment Variables ----------
 load_dotenv()
 SF_PASSWORD = os.getenv("SF_PASSWORD")
 SF_SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN")
+# ---------- Label Mapping ----------
 label_to_issue_type = {
     "LABEL_0": "Performance",
     "LABEL_1": "Error",
 }
 suggestions = {
+    "Performance": "Consider optimizing loops and database access. Use collections to reduce SOQL queries.",
+    "Error": "Add proper error handling and null checks. Use try-catch blocks effectively.",
+    "Security": "Avoid dynamic SOQL. Use binding variables to prevent SOQL injection.",
+    "Best Practice": "Refactor for readability and use bulk-safe patterns, such as processing records in batches."
 }
 severities = {
     "Best Practice": "Low"
 }
+# ---------- Load QnA Model (no fallback) ----------
+qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-large")
 # ---------- Logging ----------
 def log_to_console(data, log_type):
     sf = None
     print(f"❌ Failed to connect to Salesforce: {e}")
+# ---------- Code Analyzer ----------
 def analyze_code(code):
+    if not code.strip():
         return "No code provided.", "", ""
+    label = random.choice(list(label_to_issue_type.keys()))
+    issue_type = label_to_issue_type[label]
+    suggestion = suggestions[issue_type]
+    severity = severities[issue_type]
     review_data = {
         "Name": f"Review_{issue_type}",
         "CodeSnippet__c": code,
         "IssueType__c": issue_type,
+        "Suggestion__c": suggestion,
         "Severity__c": severity
     }
     else:
         log_to_console("Salesforce not connected.", "Salesforce Error")
+    return issue_type, suggestion, severity
+# ---------- Metadata Validator ----------
 def validate_metadata(metadata, admin_id=None):
+    if not metadata.strip():
         return "No metadata provided.", "", ""
+    mtype = "Field"
+    issue = "Unknown"
+    recommendation = "No recommendation found."
     try:
         root = ET.fromstring(metadata)
+        description_found = any(elem.tag.endswith('description') for elem in root)
+        if not description_found:
+            issue = "Missing description"
+            recommendation = "Add a meaningful <description> to improve maintainability and clarity."
+        else:
+            issue = "Unused field detected"
+            recommendation = "Remove it to improve performance or document its purpose."
     except Exception as e:
         issue = "Invalid XML"
         recommendation = f"Could not parse metadata XML. Error: {str(e)}"
     return mtype, issue, recommendation
+# ---------- Salesforce Chatbot (Improved Prompt) ----------
+conversation_history = []
 def salesforce_chatbot(query, history=[]):
     global conversation_history
+    if not query.strip():
         return "Please provide a valid Salesforce-related question."
     salesforce_keywords = [
         "apex", "soql", "trigger", "lwc", "aura", "visualforce", "salesforce", "governor limits",
         "dml", "metadata", "batch apex", "queueable", "future method", "api", "sfdc", "heap", "limits"
     ]
+    if not any(keyword.lower() in query.lower() for keyword in salesforce_keywords):
         return "Please ask a Salesforce-related question."
     history_summary = "\n".join([f"User: {q}\nAssistant: {a}" for q, a in conversation_history[-4:]])
+    prompt = f"""
+You are a certified Salesforce developer and architect. Your role is to answer with 100% accurate and detailed technical explanations, especially about limits, code, and platform best practices.
+Your answers MUST:
+Always be at least two lines long.
+Be correct, clear, and production-safe.
+Include official Salesforce governor limits when applicable.
+Use bullet points or code snippets when needed.
+Recommend Trailhead or official docs if the answer isn't definitive.
+Follow real-world practices (bulkification, error handling, etc).
+Conversation History:
+{history_summary}
+User: {query.strip()}
+Assistant:
+"""
     try:
+        result = qa_pipeline(prompt, max_new_tokens=1024, do_sample=False, temperature=0.1, top_k=50)
+        output = result[0]["generated_text"].strip()
+        if output.startswith("Assistant:"):
+            output = output.replace("Assistant:", "").strip()
+        if len(output.split()) < 15:
+            output += "\n\nRefer to: https://developer.salesforce.com/docs for more."
+        conversation_history.append((query, output))
         conversation_history = conversation_history[-6:]
+        log_to_console({"Question": query, "Answer": output}, "Chatbot Query")
+        return output
     except Exception as e:
         return f"⚠️ Error generating response: {str(e)}"
 # ---------- Gradio UI ----------
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🤖 Advanced Salesforce AI Code Review & Chatbot")
         clear_button.click(fn=clear_chat, inputs=None, outputs=[chatbot_output, query_input])
 if __name__ == "__main__":
+    demo.launch()