Bhanumani12 commited on
Commit
6fd3baf
·
verified ·
1 Parent(s): 61ea1a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -186
app.py CHANGED
@@ -1,15 +1,11 @@
1
- # app.py
2
  import os
3
- import re
4
- import json
5
  import random
6
- import xml.etree.ElementTree as ET
7
- from datetime import datetime
8
-
9
  import gradio as gr
 
10
  from transformers import pipeline
11
  from simple_salesforce import Salesforce, SalesforceLogin
12
  from dotenv import load_dotenv
 
13
 
14
  # ---------- Load Environment Variables ----------
15
  load_dotenv()
@@ -17,7 +13,7 @@ SF_USERNAME = os.getenv("SF_USERNAME")
17
  SF_PASSWORD = os.getenv("SF_PASSWORD")
18
  SF_SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN")
19
 
20
- # ---------- Label Mapping (kept for model hint mapping) ----------
21
  label_to_issue_type = {
22
  "LABEL_0": "Performance",
23
  "LABEL_1": "Error",
@@ -26,10 +22,10 @@ label_to_issue_type = {
26
  }
27
 
28
  suggestions = {
29
- "Performance": "Consider optimizing loops and database access. Use collections to reduce SOQL/DML calls, avoid SOQL/DML inside loops, and add selective WHERE clauses.",
30
- "Error": "Add proper error handling and null checks. Wrap DML in try/catch and use Database methods for partial success.",
31
- "Security": "Avoid dynamic SOQL. Use bind variables, with sharing, and field-level security checks where applicable.",
32
- "Best Practice": "Refactor for readability and bulk-safety (Batchable/Queueable where needed). Limit fields and records in queries."
33
  }
34
 
35
  severities = {
@@ -39,20 +35,8 @@ severities = {
39
  "Best Practice": "Low"
40
  }
41
 
42
- # ---------- Hugging Face Models (Hugging Face only, per BRD/SDD) ----------
43
- # NLP for chatbot
44
- try:
45
- nlp_pipeline = pipeline("text-generation", model="bigscience/bloomz-560m")
46
- except Exception as e:
47
- nlp_pipeline = None
48
- print(f"⚠️ Could not load BLOOMZ model: {e}")
49
-
50
- # Optional classifier for a small hint in code analysis (not required)
51
- try:
52
- clf_pipeline = pipeline("text-classification", model="microsoft/codebert-base")
53
- except Exception as e:
54
- clf_pipeline = None
55
- print(f"⚠️ Could not load CodeBERT classifier: {e}")
56
 
57
  # ---------- Logging ----------
58
  def log_to_console(data, log_type):
@@ -72,85 +56,21 @@ except Exception as e:
72
  sf = None
73
  print(f"❌ Failed to connect to Salesforce: {e}")
74
 
75
- # ---------- Heuristic Rules for Apex/LWC (governor, security, best-practice) ----------
76
- SOQL_PATTERN = re.compile(r"\b(?:Database\.query|SELECT\s+[\s\S]+?FROM\b)", re.IGNORECASE)
77
- DML_PATTERN = re.compile(r"\b(insert|update|upsert|delete|undelete|merge)\b", re.IGNORECASE)
78
- LOOP_PATTERN = re.compile(r"\b(for\s*\(|while\s*\()", re.IGNORECASE)
79
- DEBUG_PATTERN = re.compile(r"\bSystem\.debug\s*\(", re.IGNORECASE)
80
- DYNAMIC_SOQL_PATTERN = re.compile(r"['\"].*SELECT.*FROM.*['\"]\s*\+\s*", re.IGNORECASE)
81
- UNBOUNDED_QUERY_PATTERN = re.compile(r"SELECT\s+\*\s+FROM", re.IGNORECASE) # JS/LWC anti-pattern
82
- NULL_GUARD_PATTERN = re.compile(r"\b(\w+)\.(\w+)\(", re.IGNORECASE) # rough chained-call detector
83
-
84
- def analyze_code_rules(code: str):
85
- issues = []
86
-
87
- # SOQL/DML inside loops
88
- for loop in LOOP_PATTERN.finditer(code):
89
- loop_block = code[loop.start(): loop.start() + 400] # shallow lookahead
90
- if SOQL_PATTERN.search(loop_block):
91
- issues.append(("Performance", "SOQL query inside a loop detected. Move query outside the loop or use collections."))
92
- if DML_PATTERN.search(loop_block):
93
- issues.append(("Performance", "DML operation inside a loop detected. Bulkify by collecting records and performing DML once."))
94
-
95
- # Dynamic SOQL
96
- if DYNAMIC_SOQL_PATTERN.search(code):
97
- issues.append(("Security", "Dynamic SOQL concatenation detected. Use bind variables to prevent injection."))
98
-
99
- # Excessive debug statements
100
- dbg_count = len(DEBUG_PATTERN.findall(code))
101
- if dbg_count > 2:
102
- issues.append(("Best Practice", f"Found {dbg_count} System.debug statements. Remove or gate them for production."))
103
-
104
- # Unbounded queries (JS/LWC anti-patterns)
105
- if UNBOUNDED_QUERY_PATTERN.search(code):
106
- issues.append(("Performance", "Unbounded SELECT * detected. Query only required fields."))
107
-
108
- # (Very) rough null guard hint
109
- dot_calls = len(NULL_GUARD_PATTERN.findall(code))
110
- if dot_calls > 15:
111
- issues.append(("Error", "Multiple chained calls detected. Ensure null checks and guard clauses to avoid NullPointerExceptions."))
112
-
113
- # Optional classifier hint
114
- if clf_pipeline:
115
- try:
116
- pred = clf_pipeline(code[:1000])[0] # short context
117
- mapped = label_to_issue_type.get(pred.get("label"), "Best Practice")
118
- issues.append((mapped, f"Model hint: {mapped} issue likely. Confidence ~{pred.get('score', 0):.2f}"))
119
- except Exception:
120
- pass
121
-
122
- # Deduplicate by message
123
- seen = set()
124
- deduped = []
125
- for t, msg in issues:
126
- if msg not in seen:
127
- seen.add(msg)
128
- deduped.append((t, msg))
129
- return deduped
130
-
131
- def pick_primary(issues):
132
- # Priority: Security/Error > Performance > Best Practice
133
- prio = {"Security": 3, "Error": 3, "Performance": 2, "Best Practice": 1}
134
- if not issues:
135
- return ("Best Practice", suggestions["Best Practice"], severities["Best Practice"])
136
- issues_sorted = sorted(issues, key=lambda x: prio.get(x[0], 0), reverse=True)
137
- top_type = issues_sorted[0][0]
138
- merged = "; ".join(msg for _, msg in issues_sorted[:3])
139
- return (top_type, merged or suggestions[top_type], severities[top_type])
140
-
141
- # ---------- Code Analyzer (UI callback) ----------
142
  def analyze_code(code):
143
- if not code or not code.strip():
144
  return "No code provided.", "", ""
145
 
146
- issues = analyze_code_rules(code)
147
- issue_type, suggestion_text, severity = pick_primary(issues)
 
 
148
 
149
  review_data = {
150
  "Name": f"Review_{issue_type}",
151
  "CodeSnippet__c": code,
152
  "IssueType__c": issue_type,
153
- "Suggestion__c": suggestion_text,
154
  "Severity__c": severity
155
  }
156
 
@@ -168,65 +88,28 @@ def analyze_code(code):
168
  else:
169
  log_to_console("Salesforce not connected.", "Salesforce Error")
170
 
171
- return issue_type, suggestion_text, severity
172
 
173
- # ---------- Metadata Validator (UI callback) ----------
174
  def validate_metadata(metadata, admin_id=None):
175
- if not metadata or not metadata.strip():
176
  return "No metadata provided.", "", ""
177
 
178
- mtype = "Object"
179
- issue = "No issues detected."
180
- recommendation = "Looks good."
181
 
182
  try:
183
  root = ET.fromstring(metadata)
184
- # 1) Description present?
185
- has_description = any(elem.tag.lower().endswith('description') and (elem.text or '').strip() for elem in root.iter())
186
- # 2) Duplicate <fullName> or generic <name> values?
187
- names = []
188
- duplicates = set()
189
- for elem in root.iter():
190
- tag = elem.tag.lower()
191
- if tag.endswith('fullname') or tag.endswith('name'):
192
- if elem.text:
193
- val = elem.text.strip()
194
- if val in names:
195
- duplicates.add(val)
196
- names.append(val)
197
- # 3) Fields missing helpText/description
198
- missing_help = []
199
- for f in root.iter():
200
- if f.tag.lower().endswith('fields'):
201
- fname, fdesc, fhelp = None, None, None
202
- for ch in f:
203
- t = ch.tag.lower()
204
- if t.endswith('fullname') and ch.text:
205
- fname = ch.text.strip()
206
- if t.endswith('description') and ch.text:
207
- fdesc = ch.text.strip()
208
- if t.endswith('helptext') and ch.text:
209
- fhelp = ch.text.strip()
210
- if fname and not (fdesc or fhelp):
211
- missing_help.append(fname)
212
-
213
- problems = []
214
- if not has_description:
215
- problems.append("Missing <description> on the object/metadata.")
216
- if duplicates:
217
- problems.append(f"Duplicate names detected: {', '.join(sorted(list(duplicates)))}.")
218
- if missing_help:
219
- problems.append(f"Fields missing description/helpText: {', '.join(missing_help[:10])}" + ("..." if len(missing_help) > 10 else ""))
220
-
221
- if problems:
222
- issue = " | ".join(problems)
223
- recommendation = "Add descriptions/helpText; remove duplicates; follow naming standards."
224
- else:
225
- issue = "No high-severity issues detected."
226
- recommendation = "Consider adding descriptions and reviewing picklists for inactive values."
227
 
 
 
 
 
 
 
228
  except Exception as e:
229
- mtype = "Unknown"
230
  issue = "Invalid XML"
231
  recommendation = f"Could not parse metadata XML. Error: {str(e)}"
232
 
@@ -257,68 +140,59 @@ def validate_metadata(metadata, admin_id=None):
257
 
258
  return mtype, issue, recommendation
259
 
260
- # ---------- Chatbot helpers (no hardcoded answers; model-only) ----------
261
- def _clean_llm_reply(generated: str) -> str:
262
- """Strip prompt echoing and keep only the assistant's part."""
263
- text = generated or ""
264
- # Keep only content after the last 'Assistant:'
265
- if "Assistant:" in text:
266
- text = text.split("Assistant:")[-1]
267
- # Remove any lines that start with 'User:' to avoid echo
268
- lines = [line for line in text.splitlines() if not line.strip().startswith("User:")]
269
- cleaned = "\n".join(lines).strip()
270
- cleaned = re.sub(r"\n{3,}", "\n\n", cleaned)
271
- return cleaned
272
 
273
  def salesforce_chatbot(query, history=[]):
274
  global conversation_history
275
- if not query or not query.strip():
276
  return "Please provide a valid Salesforce-related question."
277
 
278
  salesforce_keywords = [
279
  "apex", "soql", "trigger", "lwc", "aura", "visualforce", "salesforce", "governor limits",
280
  "dml", "metadata", "batch apex", "queueable", "future method", "api", "sfdc", "heap", "limits"
281
  ]
282
- if not any(k in query.lower() for k in salesforce_keywords):
 
283
  return "Please ask a Salesforce-related question."
284
 
285
  history_summary = "\n".join([f"User: {q}\nAssistant: {a}" for q, a in conversation_history[-4:]])
286
 
287
- system_prompt = (
288
- "You are a certified Salesforce developer and architect. Answer with correct, production-safe guidance. "
289
- "When relevant, mention governor limits (e.g., 100 SOQL queries per transaction, 150 DML statements). "
290
- "Use bullets or code snippets. Prefer bulk-safe patterns and official docs. "
291
- "Do NOT repeat the user's question in your answer."
292
- )
 
 
 
 
293
 
294
- prompt = f"{system_prompt}\n\nConversation History:\n{history_summary}\n\nUser: {query.strip()}\nAssistant:"
 
 
 
 
 
295
 
296
  try:
297
- if nlp_pipeline:
298
- gen = nlp_pipeline(
299
- prompt,
300
- max_new_tokens=220,
301
- do_sample=False
302
- )[0]["generated_text"]
303
- out = _clean_llm_reply(gen)
304
- else:
305
- out = "⚠️ NLP model not available. Please check Hugging Face pipeline."
306
 
307
- # Ensure non-trivial response
308
- if len(out.split()) < 12:
309
- out += "\n\nRefer to the official docs: https://developer.salesforce.com/docs"
310
 
311
- conversation_history.append((query, out))
312
  conversation_history = conversation_history[-6:]
313
- log_to_console({"Question": query, "Answer": out}, "Chatbot Query")
314
- return out
315
-
316
  except Exception as e:
317
  return f"⚠️ Error generating response: {str(e)}"
318
 
319
  # ---------- Gradio UI ----------
320
- conversation_history = []
321
-
322
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
323
  gr.Markdown("# 🤖 Advanced Salesforce AI Code Review & Chatbot")
324
 
@@ -362,4 +236,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
362
  clear_button.click(fn=clear_chat, inputs=None, outputs=[chatbot_output, query_input])
363
 
364
  if __name__ == "__main__":
365
- demo.launch()
 
 
1
  import os
 
 
2
  import random
 
 
 
3
  import gradio as gr
4
+ from datetime import datetime
5
  from transformers import pipeline
6
  from simple_salesforce import Salesforce, SalesforceLogin
7
  from dotenv import load_dotenv
8
+ import xml.etree.ElementTree as ET
9
 
10
  # ---------- Load Environment Variables ----------
11
  load_dotenv()
 
13
  SF_PASSWORD = os.getenv("SF_PASSWORD")
14
  SF_SECURITY_TOKEN = os.getenv("SF_SECURITY_TOKEN")
15
 
16
+ # ---------- Label Mapping ----------
17
  label_to_issue_type = {
18
  "LABEL_0": "Performance",
19
  "LABEL_1": "Error",
 
22
  }
23
 
24
  suggestions = {
25
+ "Performance": "Consider optimizing loops and database access. Use collections to reduce SOQL queries.",
26
+ "Error": "Add proper error handling and null checks. Use try-catch blocks effectively.",
27
+ "Security": "Avoid dynamic SOQL. Use binding variables to prevent SOQL injection.",
28
+ "Best Practice": "Refactor for readability and use bulk-safe patterns, such as processing records in batches."
29
  }
30
 
31
  severities = {
 
35
  "Best Practice": "Low"
36
  }
37
 
38
+ # ---------- Load QnA Model (no fallback) ----------
39
+ qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-large")
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  # ---------- Logging ----------
42
  def log_to_console(data, log_type):
 
56
  sf = None
57
  print(f"❌ Failed to connect to Salesforce: {e}")
58
 
59
+ # ---------- Code Analyzer ----------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  def analyze_code(code):
61
+ if not code.strip():
62
  return "No code provided.", "", ""
63
 
64
+ label = random.choice(list(label_to_issue_type.keys()))
65
+ issue_type = label_to_issue_type[label]
66
+ suggestion = suggestions[issue_type]
67
+ severity = severities[issue_type]
68
 
69
  review_data = {
70
  "Name": f"Review_{issue_type}",
71
  "CodeSnippet__c": code,
72
  "IssueType__c": issue_type,
73
+ "Suggestion__c": suggestion,
74
  "Severity__c": severity
75
  }
76
 
 
88
  else:
89
  log_to_console("Salesforce not connected.", "Salesforce Error")
90
 
91
+ return issue_type, suggestion, severity
92
 
93
+ # ---------- Metadata Validator ----------
94
  def validate_metadata(metadata, admin_id=None):
95
+ if not metadata.strip():
96
  return "No metadata provided.", "", ""
97
 
98
+ mtype = "Field"
99
+ issue = "Unknown"
100
+ recommendation = "No recommendation found."
101
 
102
  try:
103
  root = ET.fromstring(metadata)
104
+ description_found = any(elem.tag.endswith('description') for elem in root)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
+ if not description_found:
107
+ issue = "Missing description"
108
+ recommendation = "Add a meaningful <description> to improve maintainability and clarity."
109
+ else:
110
+ issue = "Unused field detected"
111
+ recommendation = "Remove it to improve performance or document its purpose."
112
  except Exception as e:
 
113
  issue = "Invalid XML"
114
  recommendation = f"Could not parse metadata XML. Error: {str(e)}"
115
 
 
140
 
141
  return mtype, issue, recommendation
142
 
143
+ # ---------- Salesforce Chatbot (Improved Prompt) ----------
144
+ conversation_history = []
 
 
 
 
 
 
 
 
 
 
145
 
146
  def salesforce_chatbot(query, history=[]):
147
  global conversation_history
148
+ if not query.strip():
149
  return "Please provide a valid Salesforce-related question."
150
 
151
  salesforce_keywords = [
152
  "apex", "soql", "trigger", "lwc", "aura", "visualforce", "salesforce", "governor limits",
153
  "dml", "metadata", "batch apex", "queueable", "future method", "api", "sfdc", "heap", "limits"
154
  ]
155
+
156
+ if not any(keyword.lower() in query.lower() for keyword in salesforce_keywords):
157
  return "Please ask a Salesforce-related question."
158
 
159
  history_summary = "\n".join([f"User: {q}\nAssistant: {a}" for q, a in conversation_history[-4:]])
160
 
161
+ prompt = f"""
162
+ You are a certified Salesforce developer and architect. Your role is to answer with 100% accurate and detailed technical explanations, especially about limits, code, and platform best practices.
163
+
164
+ Your answers MUST:
165
+ Always be at least two lines long.
166
+ Be correct, clear, and production-safe.
167
+ Include official Salesforce governor limits when applicable.
168
+ Use bullet points or code snippets when needed.
169
+ Recommend Trailhead or official docs if the answer isn't definitive.
170
+ Follow real-world practices (bulkification, error handling, etc).
171
 
172
+ Conversation History:
173
+ {history_summary}
174
+
175
+ User: {query.strip()}
176
+ Assistant:
177
+ """
178
 
179
  try:
180
+ result = qa_pipeline(prompt, max_new_tokens=1024, do_sample=False, temperature=0.1, top_k=50)
181
+ output = result[0]["generated_text"].strip()
182
+ if output.startswith("Assistant:"):
183
+ output = output.replace("Assistant:", "").strip()
 
 
 
 
 
184
 
185
+ if len(output.split()) < 15:
186
+ output += "\n\nRefer to: https://developer.salesforce.com/docs for more."
 
187
 
188
+ conversation_history.append((query, output))
189
  conversation_history = conversation_history[-6:]
190
+ log_to_console({"Question": query, "Answer": output}, "Chatbot Query")
191
+ return output
 
192
  except Exception as e:
193
  return f"⚠️ Error generating response: {str(e)}"
194
 
195
  # ---------- Gradio UI ----------
 
 
196
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
197
  gr.Markdown("# 🤖 Advanced Salesforce AI Code Review & Chatbot")
198
 
 
236
  clear_button.click(fn=clear_chat, inputs=None, outputs=[chatbot_output, query_input])
237
 
238
  if __name__ == "__main__":
239
+ demo.launch()