AIcodemetadata13

Sleeping

App Files Files Community

Bhanumani12 commited on Aug 20, 2025

Commit

61ea1a4

verified ·

1 Parent(s): e9e0e45

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -29

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 import re
 import json
@@ -16,7 +17,7 @@ SF_USERNAME = os.getenv("SF_USERNAME")
 SF_PASSWORD = os.getenv("SF_PASSWORD")
 SF_SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN")
-# ---------- Label Mapping (kept; now used as fallback) ----------
 label_to_issue_type = {
     "LABEL_0": "Performance",
     "LABEL_1": "Error",
@@ -27,7 +28,7 @@ label_to_issue_type = {
 suggestions = {
     "Performance": "Consider optimizing loops and database access. Use collections to reduce SOQL/DML calls, avoid SOQL/DML inside loops, and add selective WHERE clauses.",
     "Error": "Add proper error handling and null checks. Wrap DML in try/catch and use Database methods for partial success.",
-    "Security": "Avoid dynamic SOQL. Use bind variables, withSharing, and field-level security checks where applicable.",
     "Best Practice": "Refactor for readability and bulk-safety (Batchable/Queueable where needed). Limit fields and records in queries."
 }
@@ -39,14 +40,14 @@ severities = {
 }
 # ---------- Hugging Face Models (Hugging Face only, per BRD/SDD) ----------
-# Lightweight BLOOMZ for natural language support
 try:
     nlp_pipeline = pipeline("text-generation", model="bigscience/bloomz-560m")
 except Exception as e:
     nlp_pipeline = None
     print(f"⚠️ Could not load BLOOMZ model: {e}")
-# Optional: simple classifier (kept minimal; not strictly required)
 try:
     clf_pipeline = pipeline("text-classification", model="microsoft/codebert-base")
 except Exception as e:
@@ -71,21 +72,21 @@ except Exception as e:
     sf = None
     print(f"❌ Failed to connect to Salesforce: {e}")
-# ---------- Heuristic Rules for Apex/LWC ----------
 SOQL_PATTERN = re.compile(r"\b(?:Database\.query|SELECT\s+[\s\S]+?FROM\b)", re.IGNORECASE)
 DML_PATTERN = re.compile(r"\b(insert|update|upsert|delete|undelete|merge)\b", re.IGNORECASE)
 LOOP_PATTERN = re.compile(r"\b(for\s*\(|while\s*\()", re.IGNORECASE)
 DEBUG_PATTERN = re.compile(r"\bSystem\.debug\s*\(", re.IGNORECASE)
 DYNAMIC_SOQL_PATTERN = re.compile(r"['\"].*SELECT.*FROM.*['\"]\s*\+\s*", re.IGNORECASE)
-UNBOUNDED_QUERY_PATTERN = re.compile(r"SELECT\s+\*\s+FROM", re.IGNORECASE)  # LWC/JS cases
-NULL_GUARD_PATTERN = re.compile(r"\b(\w+)\.(\w+)\(", re.IGNORECASE)  # very rough
 def analyze_code_rules(code: str):
     issues = []
     # SOQL/DML inside loops
     for loop in LOOP_PATTERN.finditer(code):
-        loop_block = code[loop.start(): loop.start()+400]  # shallow lookahead
         if SOQL_PATTERN.search(loop_block):
             issues.append(("Performance", "SOQL query inside a loop detected. Move query outside the loop or use collections."))
         if DML_PATTERN.search(loop_block):
@@ -105,15 +106,14 @@ def analyze_code_rules(code: str):
         issues.append(("Performance", "Unbounded SELECT * detected. Query only required fields."))
     # (Very) rough null guard hint
-    # Suggest using null-checks where chained dereferences are visible
     dot_calls = len(NULL_GUARD_PATTERN.findall(code))
     if dot_calls > 15:
         issues.append(("Error", "Multiple chained calls detected. Ensure null checks and guard clauses to avoid NullPointerExceptions."))
-    # If classifier is available, add its hint as a final tag
     if clf_pipeline:
         try:
-            pred = clf_pipeline(code[:1000])[0]  # keep it small
             mapped = label_to_issue_type.get(pred.get("label"), "Best Practice")
             issues.append((mapped, f"Model hint: {mapped} issue likely. Confidence ~{pred.get('score', 0):.2f}"))
         except Exception:
@@ -135,11 +135,10 @@ def pick_primary(issues):
         return ("Best Practice", suggestions["Best Practice"], severities["Best Practice"])
     issues_sorted = sorted(issues, key=lambda x: prio.get(x[0], 0), reverse=True)
     top_type = issues_sorted[0][0]
-    # Merge messages into one suggestion
     merged = "; ".join(msg for _, msg in issues_sorted[:3])
     return (top_type, merged or suggestions[top_type], severities[top_type])
-# ---------- Code Analyzer ----------
 def analyze_code(code):
     if not code or not code.strip():
         return "No code provided.", "", ""
@@ -171,7 +170,7 @@ def analyze_code(code):
     return issue_type, suggestion_text, severity
-# ---------- Metadata Validator ----------
 def validate_metadata(metadata, admin_id=None):
     if not metadata or not metadata.strip():
         return "No metadata provided.", "", ""
@@ -184,7 +183,7 @@ def validate_metadata(metadata, admin_id=None):
         root = ET.fromstring(metadata)
         # 1) Description present?
         has_description = any(elem.tag.lower().endswith('description') and (elem.text or '').strip() for elem in root.iter())
-        # 2) Duplicate <fullName> or field names?
         names = []
         duplicates = set()
         for elem in root.iter():
@@ -199,10 +198,7 @@ def validate_metadata(metadata, admin_id=None):
         missing_help = []
         for f in root.iter():
             if f.tag.lower().endswith('fields'):
-                # look for nested field fullName
-                fname = None
-                fdesc = None
-                fhelp = None
                 for ch in f:
                     t = ch.tag.lower()
                     if t.endswith('fullname') and ch.text:
@@ -261,8 +257,18 @@ def validate_metadata(metadata, admin_id=None):
     return mtype, issue, recommendation
-# ---------- Salesforce Chatbot (BLOOMZ) ----------
-conversation_history = []
 def salesforce_chatbot(query, history=[]):
     global conversation_history
@@ -273,8 +279,7 @@ def salesforce_chatbot(query, history=[]):
         "apex", "soql", "trigger", "lwc", "aura", "visualforce", "salesforce", "governor limits",
         "dml", "metadata", "batch apex", "queueable", "future method", "api", "sfdc", "heap", "limits"
     ]
-    if not any(keyword.lower() in query.lower() for keyword in salesforce_keywords):
         return "Please ask a Salesforce-related question."
     history_summary = "\n".join([f"User: {q}\nAssistant: {a}" for q, a in conversation_history[-4:]])
@@ -282,28 +287,38 @@ def salesforce_chatbot(query, history=[]):
     system_prompt = (
         "You are a certified Salesforce developer and architect. Answer with correct, production-safe guidance. "
         "When relevant, mention governor limits (e.g., 100 SOQL queries per transaction, 150 DML statements). "
-        "Use bullets or code snippets. Prefer bulk-safe patterns and official docs."
     )
     prompt = f"{system_prompt}\n\nConversation History:\n{history_summary}\n\nUser: {query.strip()}\nAssistant:"
     try:
         if nlp_pipeline:
-            out = nlp_pipeline(prompt, max_new_tokens=220, do_sample=False)[0]["generated_text"].strip()
         else:
-            out = "Governor limits matter (e.g., 100 SOQL queries/tx, 150 DML). Use bulk patterns, selective queries, and proper error handling."
-        # Keep answer reasonable length
-        if len(out.split()) < 15:
-            out += "\n\nTip: Use Database.insert with allOrNone=false for partial success and check Limits class."
         conversation_history.append((query, out))
         conversation_history = conversation_history[-6:]
         log_to_console({"Question": query, "Answer": out}, "Chatbot Query")
         return out
     except Exception as e:
         return f"⚠️ Error generating response: {str(e)}"
 # ---------- Gradio UI ----------
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🤖 Advanced Salesforce AI Code Review & Chatbot")

+# app.py
 import os
 import re
 import json
 SF_PASSWORD = os.getenv("SF_PASSWORD")
 SF_SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN")
+# ---------- Label Mapping (kept for model hint mapping) ----------
 label_to_issue_type = {
     "LABEL_0": "Performance",
     "LABEL_1": "Error",
 suggestions = {
     "Performance": "Consider optimizing loops and database access. Use collections to reduce SOQL/DML calls, avoid SOQL/DML inside loops, and add selective WHERE clauses.",
     "Error": "Add proper error handling and null checks. Wrap DML in try/catch and use Database methods for partial success.",
+    "Security": "Avoid dynamic SOQL. Use bind variables, with sharing, and field-level security checks where applicable.",
     "Best Practice": "Refactor for readability and bulk-safety (Batchable/Queueable where needed). Limit fields and records in queries."
 }
 }
 # ---------- Hugging Face Models (Hugging Face only, per BRD/SDD) ----------
+# NLP for chatbot
 try:
     nlp_pipeline = pipeline("text-generation", model="bigscience/bloomz-560m")
 except Exception as e:
     nlp_pipeline = None
     print(f"⚠️ Could not load BLOOMZ model: {e}")
+# Optional classifier for a small hint in code analysis (not required)
 try:
     clf_pipeline = pipeline("text-classification", model="microsoft/codebert-base")
 except Exception as e:
     sf = None
     print(f"❌ Failed to connect to Salesforce: {e}")
+# ---------- Heuristic Rules for Apex/LWC (governor, security, best-practice) ----------
 SOQL_PATTERN = re.compile(r"\b(?:Database\.query|SELECT\s+[\s\S]+?FROM\b)", re.IGNORECASE)
 DML_PATTERN = re.compile(r"\b(insert|update|upsert|delete|undelete|merge)\b", re.IGNORECASE)
 LOOP_PATTERN = re.compile(r"\b(for\s*\(|while\s*\()", re.IGNORECASE)
 DEBUG_PATTERN = re.compile(r"\bSystem\.debug\s*\(", re.IGNORECASE)
 DYNAMIC_SOQL_PATTERN = re.compile(r"['\"].*SELECT.*FROM.*['\"]\s*\+\s*", re.IGNORECASE)
+UNBOUNDED_QUERY_PATTERN = re.compile(r"SELECT\s+\*\s+FROM", re.IGNORECASE)  # JS/LWC anti-pattern
+NULL_GUARD_PATTERN = re.compile(r"\b(\w+)\.(\w+)\(", re.IGNORECASE)  # rough chained-call detector
 def analyze_code_rules(code: str):
     issues = []
     # SOQL/DML inside loops
     for loop in LOOP_PATTERN.finditer(code):
+        loop_block = code[loop.start(): loop.start() + 400]  # shallow lookahead
         if SOQL_PATTERN.search(loop_block):
             issues.append(("Performance", "SOQL query inside a loop detected. Move query outside the loop or use collections."))
         if DML_PATTERN.search(loop_block):
         issues.append(("Performance", "Unbounded SELECT * detected. Query only required fields."))
     # (Very) rough null guard hint
     dot_calls = len(NULL_GUARD_PATTERN.findall(code))
     if dot_calls > 15:
         issues.append(("Error", "Multiple chained calls detected. Ensure null checks and guard clauses to avoid NullPointerExceptions."))
+    # Optional classifier hint
     if clf_pipeline:
         try:
+            pred = clf_pipeline(code[:1000])[0]  # short context
             mapped = label_to_issue_type.get(pred.get("label"), "Best Practice")
             issues.append((mapped, f"Model hint: {mapped} issue likely. Confidence ~{pred.get('score', 0):.2f}"))
         except Exception:
         return ("Best Practice", suggestions["Best Practice"], severities["Best Practice"])
     issues_sorted = sorted(issues, key=lambda x: prio.get(x[0], 0), reverse=True)
     top_type = issues_sorted[0][0]
     merged = "; ".join(msg for _, msg in issues_sorted[:3])
     return (top_type, merged or suggestions[top_type], severities[top_type])
+# ---------- Code Analyzer (UI callback) ----------
 def analyze_code(code):
     if not code or not code.strip():
         return "No code provided.", "", ""
     return issue_type, suggestion_text, severity
+# ---------- Metadata Validator (UI callback) ----------
 def validate_metadata(metadata, admin_id=None):
     if not metadata or not metadata.strip():
         return "No metadata provided.", "", ""
         root = ET.fromstring(metadata)
         # 1) Description present?
         has_description = any(elem.tag.lower().endswith('description') and (elem.text or '').strip() for elem in root.iter())
+        # 2) Duplicate <fullName> or generic <name> values?
         names = []
         duplicates = set()
         for elem in root.iter():
         missing_help = []
         for f in root.iter():
             if f.tag.lower().endswith('fields'):
+                fname, fdesc, fhelp = None, None, None
                 for ch in f:
                     t = ch.tag.lower()
                     if t.endswith('fullname') and ch.text:
     return mtype, issue, recommendation
+# ---------- Chatbot helpers (no hardcoded answers; model-only) ----------
+def _clean_llm_reply(generated: str) -> str:
+    """Strip prompt echoing and keep only the assistant's part."""
+    text = generated or ""
+    # Keep only content after the last 'Assistant:'
+    if "Assistant:" in text:
+        text = text.split("Assistant:")[-1]
+    # Remove any lines that start with 'User:' to avoid echo
+    lines = [line for line in text.splitlines() if not line.strip().startswith("User:")]
+    cleaned = "\n".join(lines).strip()
+    cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
+    return cleaned
 def salesforce_chatbot(query, history=[]):
     global conversation_history
         "apex", "soql", "trigger", "lwc", "aura", "visualforce", "salesforce", "governor limits",
         "dml", "metadata", "batch apex", "queueable", "future method", "api", "sfdc", "heap", "limits"
     ]
+    if not any(k in query.lower() for k in salesforce_keywords):
         return "Please ask a Salesforce-related question."
     history_summary = "\n".join([f"User: {q}\nAssistant: {a}" for q, a in conversation_history[-4:]])
     system_prompt = (
         "You are a certified Salesforce developer and architect. Answer with correct, production-safe guidance. "
         "When relevant, mention governor limits (e.g., 100 SOQL queries per transaction, 150 DML statements). "
+        "Use bullets or code snippets. Prefer bulk-safe patterns and official docs. "
+        "Do NOT repeat the user's question in your answer."
     )
     prompt = f"{system_prompt}\n\nConversation History:\n{history_summary}\n\nUser: {query.strip()}\nAssistant:"
     try:
         if nlp_pipeline:
+            gen = nlp_pipeline(
+                prompt,
+                max_new_tokens=220,
+                do_sample=False
+            )[0]["generated_text"]
+            out = _clean_llm_reply(gen)
         else:
+            out = "⚠️ NLP model not available. Please check Hugging Face pipeline."
+        # Ensure non-trivial response
+        if len(out.split()) < 12:
+            out += "\n\nRefer to the official docs: https://developer.salesforce.com/docs"
         conversation_history.append((query, out))
         conversation_history = conversation_history[-6:]
         log_to_console({"Question": query, "Answer": out}, "Chatbot Query")
         return out
     except Exception as e:
         return f"⚠️ Error generating response: {str(e)}"
 # ---------- Gradio UI ----------
+conversation_history = []
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🤖 Advanced Salesforce AI Code Review & Chatbot")