LiamKhoaLe commited on
Commit
ac4ae39
·
1 Parent(s): d32bc90

Change coder agent from Gemini pro to Qwen coder. Upd code prompt enhancer

Browse files
Files changed (2) hide show
  1. helpers/coder.py +199 -32
  2. utils/api/router.py +11 -0
helpers/coder.py CHANGED
@@ -1,17 +1,21 @@
1
  """
2
  helpers/coder.py
3
 
4
- Single-agent code generation using Gemini Pro. Produces files-by-files Markdown
5
- with per-file explanations. Designed to be called from report generation to
6
- attach code outputs to the appropriate subsection.
7
  """
8
 
 
9
  from typing import Optional
10
  from utils.logger import get_logger
11
  from utils.service.common import trim_text
12
 
13
  logger = get_logger("CODER", __name__)
14
 
 
 
 
15
 
16
  async def generate_code_artifacts(
17
  subsection_id: str,
@@ -23,36 +27,21 @@ async def generate_code_artifacts(
23
  nvidia_rotator,
24
  user_id: str = ""
25
  ) -> str:
26
- """Generate code (files-by-files) with explanations using Gemini Pro.
 
 
 
 
27
 
28
  Returns a Markdown string containing multiple code blocks. Each block is
29
  preceded by a heading like `File: path` and followed by a short
30
  explanation. The content is grounded in provided contexts.
31
  """
32
- from utils.api.router import generate_answer_with_model
33
-
34
- system_prompt = (
35
- "You are a senior software engineer. Generate production-quality code that fulfills the TASK,\n"
36
- "grounded strictly in the provided CONTEXT.\n"
37
- "Rules:\n"
38
- "- Output Markdown with multiple code blocks by file, each preceded by a short heading 'File: path'.\n"
39
- "- Prefer clear, minimal dependencies.\n"
40
- "- After each code block, add a concise explanation of design decisions.\n"
41
- "- Ensure coherent naming and imports across files.\n"
42
- "- If mentioning endpoints/APIs, ensure consistency across files.\n"
43
- "- Do not include meta text like 'Here is the code'. Start with the first file heading.\n"
44
- )
45
- user_prompt = (
46
- f"SUBSECTION {subsection_id}\nTASK: {task}\nREASONING: {reasoning}\n\n"
47
- f"CONTEXT (DOCUMENT):\n{trim_text(context_text or '', 6000)}\n\n"
48
- f"CONTEXT (WEB):\n{trim_text(web_context or '', 3000)}\n\n"
49
- "Produce the code files and explanations as specified."
50
- )
51
-
52
- selection = {"provider": "gemini", "model": "gemini-2.5-pro"}
53
 
54
- logger.info(f"[CODER] Generating code for subsection {subsection_id} (task='{task[:60]}...')")
55
- # Track analytics
 
56
  try:
57
  from utils.analytics import get_analytics_tracker
58
  tracker = get_analytics_tracker()
@@ -62,19 +51,119 @@ async def generate_code_artifacts(
62
  agent_name="coding",
63
  action="generate_code",
64
  context="report_coding",
65
- metadata={"subsection_id": subsection_id}
66
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  await tracker.track_model_usage(
68
  user_id=user_id,
69
- model_name=selection["model"],
70
- provider=selection["provider"],
71
  context="report_coding",
72
  metadata={"subsection_id": subsection_id}
73
  )
74
  except Exception:
75
  pass
76
- code_md = await generate_answer_with_model(selection, system_prompt, user_prompt, gemini_rotator, nvidia_rotator, user_id, "coding")
77
- code_md = (code_md or "").strip()
78
 
79
  if not code_md:
80
  logger.warning(f"[CODER] Empty code output for subsection {subsection_id}")
@@ -89,6 +178,84 @@ async def generate_code_artifacts(
89
  return code_md
90
 
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  def extract_structured_code(markdown: str):
93
  """Extract structured code blocks from the Gemini output.
94
 
 
1
  """
2
  helpers/coder.py
3
 
4
+ Single-agent code generation using NVIDIA Qwen3 Coder model with Chain of Thought reasoning.
5
+ Produces files-by-files Markdown with per-file explanations. Designed to be called from
6
+ report generation to attach code outputs to the appropriate subsection.
7
  """
8
 
9
+ import os
10
  from typing import Optional
11
  from utils.logger import get_logger
12
  from utils.service.common import trim_text
13
 
14
  logger = get_logger("CODER", __name__)
15
 
16
+ # Get the NVIDIA coder model from environment
17
+ NVIDIA_CODER = os.getenv("NVIDIA_CODER", "qwen/qwen3-coder-480b-a35b-instruct")
18
+
19
 
20
  async def generate_code_artifacts(
21
  subsection_id: str,
 
27
  nvidia_rotator,
28
  user_id: str = ""
29
  ) -> str:
30
+ """Generate code (files-by-files) with explanations using NVIDIA Qwen3 Coder with CoT reasoning.
31
+
32
+ Enhanced workflow:
33
+ 1. Use NVIDIA_LARGE to analyze and enhance the task requirements
34
+ 2. Use NVIDIA_CODER to generate the actual code based on enhanced requirements
35
 
36
  Returns a Markdown string containing multiple code blocks. Each block is
37
  preceded by a heading like `File: path` and followed by a short
38
  explanation. The content is grounded in provided contexts.
39
  """
40
+ from utils.api.router import nvidia_large_chat_completion
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ logger.info(f"[CODER] Starting enhanced code generation for subsection {subsection_id} (task='{task[:60]}...')")
43
+
44
+ # Track analytics for the coding agent
45
  try:
46
  from utils.analytics import get_analytics_tracker
47
  tracker = get_analytics_tracker()
 
51
  agent_name="coding",
52
  action="generate_code",
53
  context="report_coding",
54
+ metadata={"subsection_id": subsection_id, "model": NVIDIA_CODER}
55
  )
56
+ except Exception:
57
+ pass
58
+
59
+ # Step 1: Use NVIDIA_LARGE to analyze and enhance the task requirements
60
+ logger.info(f"[CODER] Step 1: Analyzing task with NVIDIA_LARGE for subsection {subsection_id}")
61
+
62
+ analysis_system_prompt = (
63
+ "You are a senior software architect and technical lead. Your task is to analyze a coding requirement "
64
+ "and provide a comprehensive, enhanced specification that will be used by a code generation AI.\n\n"
65
+ "ANALYSIS REQUIREMENTS:\n"
66
+ "1. Break down the task into clear, actionable components\n"
67
+ "2. Identify potential technical challenges and solutions\n"
68
+ "3. Suggest appropriate technologies, frameworks, and patterns\n"
69
+ "4. Define clear requirements and constraints\n"
70
+ "5. Identify dependencies and relationships between components\n"
71
+ "6. Consider scalability, maintainability, and best practices\n\n"
72
+ "OUTPUT FORMAT:\n"
73
+ "Provide a structured analysis in the following format:\n"
74
+ "- **Task Analysis**: Clear breakdown of what needs to be implemented\n"
75
+ "- **Technical Requirements**: Specific technical specifications\n"
76
+ "- **Architecture Suggestions**: Recommended structure and patterns\n"
77
+ "- **Dependencies**: Required libraries, frameworks, or external services\n"
78
+ "- **Implementation Notes**: Key considerations for the implementation\n"
79
+ "- **Enhanced Task Description**: A refined, detailed task description for code generation"
80
+ )
81
+
82
+ analysis_user_prompt = (
83
+ f"ORIGINAL TASK: {task}\n"
84
+ f"ORIGINAL REASONING: {reasoning}\n"
85
+ f"SUBSECTION: {subsection_id}\n\n"
86
+ f"CONTEXT (DOCUMENT):\n{trim_text(context_text or '', 8000)}\n\n"
87
+ f"CONTEXT (WEB):\n{trim_text(web_context or '', 4000)}\n\n"
88
+ "Please analyze this coding task and provide a comprehensive enhancement that will guide the code generation process."
89
+ )
90
+
91
+ try:
92
+ enhanced_analysis = await nvidia_large_chat_completion(analysis_system_prompt, analysis_user_prompt, nvidia_rotator)
93
+ logger.info(f"[CODER] Task analysis completed for subsection {subsection_id}")
94
+
95
+ # Track NVIDIA_LARGE usage
96
+ try:
97
+ if tracker and user_id:
98
+ await tracker.track_model_usage(
99
+ user_id=user_id,
100
+ model_name="nvidia_large",
101
+ provider="nvidia_large",
102
+ context="code_analysis",
103
+ metadata={"subsection_id": subsection_id}
104
+ )
105
+ except Exception:
106
+ pass
107
+
108
+ except Exception as e:
109
+ logger.warning(f"[CODER] Task analysis failed for subsection {subsection_id}: {e}")
110
+ enhanced_analysis = f"**Task Analysis**: {task}\n**Technical Requirements**: {reasoning}\n**Enhanced Task Description**: {task}"
111
+
112
+ # Step 2: Use NVIDIA_CODER to generate code based on enhanced analysis
113
+ logger.info(f"[CODER] Step 2: Generating code with NVIDIA_CODER for subsection {subsection_id}")
114
+
115
+ # Enhanced system prompt with Chain of Thought reasoning
116
+ system_prompt = (
117
+ "You are a senior software engineer with expertise in code generation and architecture design.\n"
118
+ "Your task is to generate production-quality code based on the ENHANCED ANALYSIS provided below.\n\n"
119
+ "REASONING PROCESS (Chain of Thought):\n"
120
+ "1. First, analyze the enhanced requirements and constraints\n"
121
+ "2. Identify the key components and their relationships\n"
122
+ "3. Consider the context and any existing patterns or frameworks\n"
123
+ "4. Plan the code structure and architecture\n"
124
+ "5. Generate clean, maintainable code with proper error handling\n"
125
+ "6. Ensure code follows best practices and is production-ready\n\n"
126
+ "OUTPUT FORMAT:\n"
127
+ "- Output Markdown with multiple code blocks by file, each preceded by a short heading 'File: path'\n"
128
+ "- Prefer clear, minimal dependencies\n"
129
+ "- After each code block, add a concise explanation of design decisions\n"
130
+ "- Ensure coherent naming and imports across files\n"
131
+ "- If mentioning endpoints/APIs, ensure consistency across files\n"
132
+ "- Do not include meta text like 'Here is the code'. Start with the first file heading\n"
133
+ "- Include proper error handling, documentation, and testing considerations\n"
134
+ )
135
+
136
+ # Enhanced user prompt with the analysis results
137
+ user_prompt = (
138
+ f"SUBSECTION: {subsection_id}\n"
139
+ f"ENHANCED ANALYSIS:\n{enhanced_analysis}\n\n"
140
+ f"ORIGINAL CONTEXT (DOCUMENT):\n{trim_text(context_text or '', 6000)}\n\n"
141
+ f"ORIGINAL CONTEXT (WEB):\n{trim_text(web_context or '', 3000)}\n\n"
142
+ "Please follow this reasoning process:\n"
143
+ "1. Analyze the enhanced requirements and identify what needs to be implemented\n"
144
+ "2. Consider the provided context and any relevant patterns or frameworks\n"
145
+ "3. Plan the code structure, including file organization and dependencies\n"
146
+ "4. Generate clean, production-ready code with proper error handling\n"
147
+ "5. Ensure code follows best practices and is maintainable\n\n"
148
+ "Produce the code files and explanations as specified."
149
+ )
150
+
151
+ # Use the new NVIDIA coder function
152
+ code_md = await nvidia_coder_completion(system_prompt, user_prompt, nvidia_rotator)
153
+ code_md = (code_md or "").strip()
154
+
155
+ # Track NVIDIA_CODER usage
156
+ try:
157
+ if tracker and user_id:
158
  await tracker.track_model_usage(
159
  user_id=user_id,
160
+ model_name=NVIDIA_CODER,
161
+ provider="nvidia_coder",
162
  context="report_coding",
163
  metadata={"subsection_id": subsection_id}
164
  )
165
  except Exception:
166
  pass
 
 
167
 
168
  if not code_md:
169
  logger.warning(f"[CODER] Empty code output for subsection {subsection_id}")
 
178
  return code_md
179
 
180
 
181
+ async def nvidia_coder_completion(system_prompt: str, user_prompt: str, nvidia_rotator) -> str:
182
+ """
183
+ NVIDIA Coder completion using the specified coder model with streaming support.
184
+ Uses the NVIDIA API rotator for key management and supports Chain of Thought reasoning.
185
+ """
186
+ key = nvidia_rotator.get_key() or ""
187
+ url = "https://integrate.api.nvidia.com/v1/chat/completions"
188
+
189
+ payload = {
190
+ "model": NVIDIA_CODER,
191
+ "messages": [
192
+ {"role": "system", "content": system_prompt},
193
+ {"role": "user", "content": user_prompt}
194
+ ],
195
+ "temperature": 0.7,
196
+ "top_p": 0.8,
197
+ "max_tokens": 4096,
198
+ "stream": True
199
+ }
200
+
201
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {key}"}
202
+
203
+ logger.info(f"[NVIDIA_CODER] API call - Model: {NVIDIA_CODER}, Key present: {bool(key)}")
204
+ logger.info(f"[NVIDIA_CODER] System prompt length: {len(system_prompt)}, User prompt length: {len(user_prompt)}")
205
+
206
+ try:
207
+ # For streaming, we need to handle the response differently
208
+ import httpx
209
+ async with httpx.AsyncClient(timeout=120) as client: # Longer timeout for code generation
210
+ response = await client.post(url, headers=headers, json=payload)
211
+
212
+ if response.status_code in (401, 403, 429) or (500 <= response.status_code < 600):
213
+ logger.warning(f"HTTP {response.status_code} from NVIDIA Coder provider. Rotating key and retrying")
214
+ nvidia_rotator.rotate()
215
+ # Retry once with new key
216
+ key = nvidia_rotator.get_key() or ""
217
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {key}"}
218
+ response = await client.post(url, headers=headers, json=payload)
219
+
220
+ response.raise_for_status()
221
+
222
+ # Handle streaming response
223
+ content = ""
224
+ async for line in response.aiter_lines():
225
+ if line.startswith("data: "):
226
+ data = line[6:] # Remove "data: " prefix
227
+ if data.strip() == "[DONE]":
228
+ break
229
+
230
+ try:
231
+ import json
232
+ chunk_data = json.loads(data)
233
+ if "choices" in chunk_data and len(chunk_data["choices"]) > 0:
234
+ delta = chunk_data["choices"][0].get("delta", {})
235
+
236
+ # Handle reasoning content (thinking) for CoT
237
+ reasoning = delta.get("reasoning_content")
238
+ if reasoning:
239
+ logger.debug(f"[NVIDIA_CODER] Reasoning: {reasoning}")
240
+
241
+ # Handle regular content
242
+ chunk_content = delta.get("content")
243
+ if chunk_content:
244
+ content += chunk_content
245
+ except json.JSONDecodeError:
246
+ continue
247
+
248
+ if not content or content.strip() == "":
249
+ logger.warning(f"Empty content from NVIDIA Coder model")
250
+ return "I received an empty response from the model."
251
+
252
+ return content.strip()
253
+
254
+ except Exception as e:
255
+ logger.warning(f"NVIDIA Coder API error: {e}")
256
+ return "I couldn't process the request with NVIDIA Coder model."
257
+
258
+
259
  def extract_structured_code(markdown: str):
260
  """Extract structured code blocks from the Gemini output.
261
 
utils/api/router.py CHANGED
@@ -201,6 +201,17 @@ async def generate_answer_with_model(selection: Dict[str, Any], system_prompt: s
201
  logger.info("Falling back from NVIDIA_LARGE to NVIDIA_SMALL")
202
  fallback_selection = {"provider": "nvidia", "model": NVIDIA_SMALL}
203
  return await generate_answer_with_model(fallback_selection, system_prompt, user_prompt, gemini_rotator, nvidia_rotator)
 
 
 
 
 
 
 
 
 
 
 
204
 
205
  return "Unsupported provider."
206
 
 
201
  logger.info("Falling back from NVIDIA_LARGE to NVIDIA_SMALL")
202
  fallback_selection = {"provider": "nvidia", "model": NVIDIA_SMALL}
203
  return await generate_answer_with_model(fallback_selection, system_prompt, user_prompt, gemini_rotator, nvidia_rotator)
204
+ elif provider == "nvidia_coder":
205
+ # Use NVIDIA Coder for code generation tasks with fallback
206
+ try:
207
+ from helpers.coder import nvidia_coder_completion
208
+ return await nvidia_coder_completion(system_prompt, user_prompt, nvidia_rotator)
209
+ except Exception as e:
210
+ logger.warning(f"NVIDIA_CODER model failed: {e}. Attempting fallback...")
211
+ # Fallback: NVIDIA_CODER → NVIDIA_SMALL
212
+ logger.info("Falling back from NVIDIA_CODER to NVIDIA_SMALL")
213
+ fallback_selection = {"provider": "nvidia", "model": NVIDIA_SMALL}
214
+ return await generate_answer_with_model(fallback_selection, system_prompt, user_prompt, gemini_rotator, nvidia_rotator)
215
 
216
  return "Unsupported provider."
217