Krishna172912 commited on
Commit
9d84f4e
Β·
unverified Β·
1 Parent(s): 76a0948

Create config.py

Browse files
Files changed (1) hide show
  1. back_end/config.py +367 -0
back_end/config.py ADDED
@@ -0,0 +1,367 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SUPPORTED_TYPES = {
2
+ "limit_20kb": {
3
+ ".json", ".xml", ".csv", ".tsv", ".jsonl"
4
+ },
5
+ "limit_30kb": {
6
+ ".css", ".scss", ".sass", ".less"
7
+ },
8
+ "limit_50kb": {
9
+ ".yaml", ".yml"
10
+ },
11
+ "limit_2048kb": {
12
+ ".pdf"
13
+ },
14
+ "no_limit": {
15
+ # Documentation & Text
16
+ ".md", ".mdx", ".txt", ".rst", ".asciidoc", ".adoc", ".tex",
17
+
18
+ # Config & Infrastructure
19
+ ".toml", ".ini", ".cfg", ".conf", ".properties", ".hcl", ".tf", ".tfvars",
20
+ ".gitignore", ".dockerignore", ".editorconfig", ".nvmrc", ".npmignore",
21
+
22
+ # Web, UI & Templating
23
+ ".html", ".js", ".jsx", ".ts", ".tsx", ".vue", ".svelte", ".astro", ".php",
24
+ ".handlebars", ".hbs", ".ejs", ".pug", ".twig", ".liquid",
25
+
26
+ # Systems & Core Languages
27
+ ".c", ".h", ".cpp", ".hpp", ".cc", ".cxx", ".rs", ".go", ".zig", ".nim", ".asm", ".s",
28
+ ".java", ".cs", ".kt", ".kts", ".scala", ".groovy",
29
+ ".py", ".rb", ".pl", ".pm", ".lua", ".r",
30
+
31
+ # Functional & Mobile/Apple
32
+ ".hs", ".ml", ".mli", ".clj", ".cljs", ".cljc", ".ex", ".exs",
33
+ ".jl", ".swift", ".m", ".dart",
34
+
35
+ # Web3 & Hardware
36
+ ".sol", ".v",
37
+
38
+ # Shells & Notebooks
39
+ ".sh", ".bash", ".zsh", ".bat", ".cmd", ".ps1",
40
+ ".ipynb"
41
+ }
42
+ }
43
+
44
+ EXCLUDE_PATTERNS = [
45
+
46
+ # ── VCS ─────────────────────────────
47
+ "**/.git/**", "**/.svn/**", "**/.hg/**",
48
+
49
+ # ── Dependencies ────────────────────
50
+ "**/node_modules/**", "**/bower_components/**",
51
+ "**/venv/**", "**/.venv/**", "**/env/**", "**/python_env/**",
52
+ "**/vendor/**", "**/deps/**", "**/packages/**", "**/Pods/**",
53
+
54
+ # ── Build Outputs ───────────────────
55
+ "**/dist/**", "**/build/**", "**/out/**", "**/target/**",
56
+ "**/bin/**", "**/obj/**", "**/_build/**",
57
+
58
+ # ── Framework / Tooling ─────────────
59
+ "**/.next/**", "**/.nuxt/**", "**/.svelte-kit/**",
60
+ "**/.gradle/**", "**/.mvn/**",
61
+ "**/.dart_tool/**", "**/.pub-cache/**",
62
+ "**/.serverless/**",
63
+
64
+ # ── Python / Test / Cache ───────────
65
+ "**/__pycache__/**", "**/.pytest_cache/**", "**/.tox/**",
66
+ "**/.mypy_cache/**", "**/.ruff_cache/**",
67
+ "**/*.egg-info/**",
68
+
69
+ # ── Coverage / Logs ─────────────────
70
+ "**/coverage/**", "**/.nyc_output/**",
71
+ "**/*.log",
72
+
73
+ # ── IDE / OS Junk ───────────────────
74
+ "**/.vscode/**", "**/.idea/**", "**/.vs/**",
75
+ "**/.DS_Store", "**/thumbs.db",
76
+
77
+ # ── Temp ────────────────────────────
78
+ "**/tmp/**", "**/temp/**",
79
+
80
+ # ── πŸ”΄ FILE-LEVEL EXCLUSIONS (NEW) ──
81
+ "**/.gitignore",
82
+ "**/.dockerignore",
83
+ "**/.npmignore",
84
+ "**/.env",
85
+ "**/.env.*",
86
+ "**/.editorconfig",
87
+ "**/.prettierrc",
88
+ "**/.eslintrc",
89
+ "**/.stylelintrc",
90
+ ]
91
+
92
+ # Markers that indicate an auto-generated file
93
+ AUTO_GENERATED_MARKERS = [
94
+ # ── Generic / cross-language ────────────────────────────────────────────
95
+ "this file is auto-generated",
96
+ "this file was auto-generated",
97
+ "this file is automatically generated",
98
+ "this file was automatically generated",
99
+ "auto-generated by",
100
+ "auto generated by",
101
+ "automatically generated by",
102
+ "generated automatically",
103
+ "do not edit this file",
104
+ "do not edit - generated",
105
+ "do not modify this file",
106
+ "do not modify - generated",
107
+ "changes will be overwritten",
108
+ "any changes made to this file will be lost",
109
+ "any manual changes will be overwritten",
110
+ "regenerate this file",
111
+
112
+ # ── Protobuf / gRPC ─────────────────────────────────────────────────────
113
+ "generated by protoc",
114
+ "generated by the protocol buffer compiler",
115
+ "generated by protoc-gen-go",
116
+ "generated by protoc-gen-grpc",
117
+ "generated by protoc-gen-ts",
118
+ "generated by protoc-gen-js",
119
+ "source: proto/", # common protoc header hint
120
+
121
+ # ── OpenAPI / Swagger ───────────────────────────────────────────────────
122
+ "generated by openapi",
123
+ "generated by swagger",
124
+ "generated by swagger-codegen",
125
+ "generated by openapi-generator",
126
+ "do not edit the generated code",
127
+
128
+ # ── GraphQL ─────────────────────────────────────────────────────────────
129
+ "generated by graphql-codegen",
130
+ "generated by graphql code generator",
131
+ "@generated graphql", # relay, graphql-codegen pragma
132
+ "/* eslint-disable */", # almost always prepended by codegen
133
+
134
+ # ── Go tooling ──────────────────────────────────────────────────────────
135
+ "code generated by go generate",
136
+ "// code generated", # official Go convention (go generate)
137
+ "// generated by",
138
+ "do not edit.", # standard Go generated file footer
139
+
140
+ # ── Rust (build.rs / prost / tonic) ────────────────────────────────────
141
+ "// @generated",
142
+ "generated by prost",
143
+ "generated by tonic",
144
+
145
+ # ── Java / Kotlin ───────────────────────────────────────────────────────
146
+ "@javax.annotation.generated",
147
+ "@jakarta.annotation.generated",
148
+ "generated by dagger",
149
+ "generated by hilt",
150
+ "generated by room", # Android Room DAO impls
151
+ "generated by kapt",
152
+ "generated by ksp",
153
+
154
+ # ── C# / .NET ───────────────────────────────────────────────────────────
155
+ "<autogenerated>", # Visual Studio designer files
156
+ "// <auto-generated>",
157
+ "// <autogenerated />",
158
+ "tool = \"resgen\"",
159
+ "generated by microsoft",
160
+ "generated by dotnet",
161
+ "this code was generated by a tool", # .NET standard header
162
+
163
+ # ── TypeScript / JavaScript ─────────────────────────────────────────────
164
+ "// @ts-nocheck", # weak signal; combine with others
165
+ "generated by ts-proto",
166
+ "generated by typechain",
167
+ "generated by wagmi",
168
+ "this is a generated file",
169
+ "@auto-generated",
170
+
171
+ # ── Python ──────────────────────────────────────────────────────────────
172
+ "# generated by",
173
+ "# this file was generated by",
174
+ "# auto-generated",
175
+ "# do not edit",
176
+ "generated by grpc_tools",
177
+ "generated by betterproto",
178
+ "generated by datamodel-codegen",
179
+ "generated by sqlalchemy", # alembic migration hint
180
+
181
+ # ── Build systems / IDEs ────────────────────────────────────────────────
182
+ "generated by cmake",
183
+ "generated by bazel",
184
+ "generated by buck",
185
+ "generated by gradle",
186
+ "generated by xcode",
187
+ "generated by android studio",
188
+ "generated by flutter",
189
+ "generated by freezed", # Dart/Flutter
190
+ "generated by json_serializable", # Dart/Flutter
191
+
192
+ # ── Misc tools ──────────────────────────────────────────────────────────
193
+ "generated by prisma",
194
+ "generated by drizzle",
195
+ "generated by sqlc",
196
+ "generated by buf", # buf.build protobuf toolchain
197
+ "generated by mockery", # Go mock generator
198
+ "generated by moq",
199
+ "generated by wire", # Google Wire DI
200
+ "generated by copier",
201
+ "generated by stringer", # Go stringer tool
202
+ "generated by easyjson",
203
+ "lint: disable", # weak; combine with file extension
204
+
205
+ # ── Pragma-style (language-agnostic) ────────────────────────────────────
206
+ "@generated", # used by Hack, Flow, some JS tools
207
+ "/* generated */",
208
+ "// generated",
209
+ ]
210
+
211
+ # Extensions to scan for auto-gen headers
212
+ AUTO_GEN_SCAN_EXTENSIONS = {
213
+ # Your originals
214
+ ".py", ".ts", ".js", ".cs", ".java", ".kt", ".go", ".rs",
215
+ # Worth adding
216
+ ".tsx", ".jsx", # React code-gen (relay, graphql-codegen)
217
+ ".dart", # Flutter / freezed
218
+ ".proto", # protobuf definitions themselves
219
+ ".pb.go", ".pb.ts", # compiled proto output (if treated as extensions)
220
+ ".g.cs", ".designer.cs", # .NET generated suffixes
221
+ ".g.dart", # Flutter generated
222
+ ".generated.ts", # convention-based (treat whole suffix as marker)
223
+ ".h", ".cpp", ".cc", # C/C++ codegen (flatbuffers, protobuf, etc.)
224
+ ".swift", # Xcode / SwiftGen / Sourcery
225
+ ".rb", # Rails generators
226
+ ".php", # Doctrine, Symfony generators
227
+ }
228
+
229
+ AST_BASED_SPLITTING = {
230
+ # General-Purpose Programming
231
+ ".c": "c", ".h": "c",
232
+ ".cpp": "cpp", ".hpp": "cpp", ".cc": "cpp", ".cxx": "cpp",
233
+ ".cs": "csharp",
234
+ ".dart": "dart",
235
+ ".go": "go",
236
+ ".java": "java",
237
+ ".js": "javascript", ".jsx": "javascript",
238
+ ".jl": "julia",
239
+ ".kt": "kotlin", ".kts": "kotlin",
240
+ ".nim": "nim",
241
+ ".ml": "ocaml", ".mli": "ocaml",
242
+ ".pl": "perl", ".pm": "perl",
243
+ ".py": "python",
244
+ ".r": "r",
245
+ ".rb": "ruby",
246
+ ".rs": "rust",
247
+ ".scala": "scala",
248
+ ".swift": "swift",
249
+ ".ts": "typescript",
250
+ ".tsx": "tsx", # TSX has its own explicit key in the docs
251
+ ".zig": "zig",
252
+
253
+ # Web, UI & Markup
254
+ ".html": "html",
255
+ ".css": "css",
256
+ ".scss": "scss",
257
+ ".astro": "astro",
258
+ ".vue": "vue",
259
+ ".svelte": "svelte",
260
+ ".xml": "xml",
261
+ ".yaml": "yaml", ".yml": "yaml",
262
+
263
+ # Config & DevOps
264
+ ".sh": "bash", ".bash": "bash", ".zsh": "bash",
265
+ ".gitignore": "gitignore",
266
+
267
+ # Systems & Low-level
268
+ ".asm": "asm", ".s": "asm",
269
+ ".v": "verilog"
270
+ }
271
+
272
+ CHUNK_SIZE = 2048
273
+ CHUNK_OVERLAP = 200
274
+
275
+
276
+ CHROMA_PERSIST_DIR = "./generated_chroma_database"
277
+ CHROMA_COLLECTION_NAME = "vector_db"
278
+
279
+
280
+
281
+
282
+ AGENT_SYSTEM_PROMPT_HEADER = """
283
+ You are a Junior Code Researcher working in the backend of a Multi-Agent RAG system. You have access to tools to access a locally stored codebase.
284
+ **YOUR ROLE & AUDIENCE:**
285
+ - You explore the repository using tools to find precise answers.
286
+ - You do NOT interact with the end-user directly.
287
+ - You report exclusively to the Lead Code Architect (Supervisor).
288
+ - Your job is to do the heavy lifting: use tools to explore, read files, gather context, and decide when you have enough raw data for the Supervisor to formulate a response.
289
+ """
290
+
291
+ AGENT_SYSTEM_PROMPT_TOOLS = """
292
+ ## TOOL SELECTION β€” DECISION TREE
293
+ Work through this decision tree for EVERY search action:
294
+ 1. **Do you have an exact string to find?** (function name, class name, variable)
295
+ β†’ Use `exact_code_search`. This is always your first move for anything concrete.
296
+ 2. **Did exact search fail OR do you have related keywords?**
297
+ β†’ Use `keyword_code_search`. DO NOT USE QUOTES for multi-term strings (BM25 tokenizes input). Search single, distinct words: e.g., `database pool`, NOT `"database pool"`.
298
+ 3. **Are you exploring an abstract concept?**
299
+ β†’ Use `semantic_code_search` with a natural language phrase. Use this LAST.
300
+ 4. **Do you need to understand folder structure?**
301
+ β†’ Use `list_directory_contents`.
302
+ 5. **Do you need to verify a file exists?**
303
+ β†’ Use `find_file_path_by_pattern`.
304
+ 6. **Do you need to read a file's contents?**
305
+ β†’ Use `get_specific_file`."""
306
+
307
+ AGENT_SYSTEM_PROMPT_TOOLS_NO_DB = """
308
+ ## TOOL SELECTION β€” DECISION TREE
309
+ Work through this decision tree for EVERY search action. (Note: Vector search is currently disabled; rely on exact matching and structural exploration):
310
+ 1. **Do you have an exact string to find?** (function name, class name, variable)
311
+ β†’ Use `exact_code_search`. This is your primary discovery tool. Think of distinct, unique variable or function names to grep for.
312
+ 2. **Do you need to verify a file exists or find files by extension/name?**
313
+ β†’ Use `find_file_path_by_pattern`. Use this to locate config files, routes, or models when exact code strings aren't obvious.
314
+ 3. **Do you need to understand folder structure or find where components live?**
315
+ β†’ Use `list_directory_contents`.
316
+ 4. **Do you need to read a file's contents?**
317
+ β†’ Use `get_specific_file`."""
318
+
319
+ AGENT_SYSTEM_PROMPT_FOOTER = """
320
+ ## READING FILES β€” STRICT RULES
321
+ - **Never read a full file blindly.** First use `exact_code_search` to locate the relevant lines, then call `get_specific_file` with a Β±100 line buffer.
322
+ - **TRUNCATION PROTOCOL (CRITICAL):** If any tool output ends with "Output is truncated", you MUST paginate using `start_line` and `end_line` before drawing conclusions. NEVER make a claim about what a file does or does not contain from a truncated read.
323
+ - Never guess line ranges like 1–200.
324
+
325
+ ## SEARCH EFFICIENCY RULES
326
+ - **HIGH-VALUE LEAD PROTOCOL:** If you find a struct field, trait method, or config key that is directly relevant (e.g., `idempotent_hint`), you MUST read its full definition and every call site before moving on. Do not pivot after one partial read.
327
+ - **DEAD END ESCALATION:** If you have searched for the same concept 3+ times with no useful result, explicitly state: "CONCEPT NOT EXPLICITLY DOCUMENTED β€” best available evidence is [X]" and stop searching for it. Do not retry with minor keyword variations.
328
+ - Limit parallel exploration. Follow one lead to completion, then pivot.
329
+
330
+ **REASONING & ANSWER PROTOCOL:**
331
+ - Do not exceed 10 tool calls per query without pausing to reassess.
332
+ - If you receive "SUPERVISOR FEEDBACK:", your previous research was incomplete. Do not apologize. Read the instructions and find exactly what is missing.
333
+
334
+ **YOUR FINAL OUTPUT:**
335
+ Once you have gathered enough information, you must immediately stop.
336
+ OUTPUT FORMAT: You must output ONLY this exact format. Do not add summaries, insights, or explanations.
337
+
338
+ [RESEARCH COMPLETE]
339
+ Files read:
340
+ - <file_path_1>
341
+ - <file_path_2>
342
+ ...
343
+ """
344
+
345
+
346
+ SUPERVISOR_SYSTEM_PROMPT = """
347
+ You are the Lead Code Architect in a Multi-Agent RAG system.
348
+ Our platform helps users understand public code libraries.
349
+
350
+ *** CRITICAL DIVISION OF LABOR (READ CAREFULLY) ***
351
+ - The Junior Researcher (the agent) is ONLY a "Retriever". Their job is to call tools and output "[RESEARCH COMPLETE]". They are strictly forbidden from writing summaries.
352
+ - YOU are the "Synthesizer". If the raw tool outputs in the message history contain enough information, YOU must write the final answer for the user and set status="ACCEPT".
353
+ - DO NOT reject the researcher just because their final message is a short list of files. That is by design! Evaluate them based on the RAW TOOL OUTPUTS above their final message.
354
+
355
+ *** MANDATORY REJECTION TRIGGERS ***
356
+ Output status="REJECT" if ANY of the following are true:
357
+ - The agent treated a truncated file as complete without paginating.
358
+ - The user asked for code/syntax, but the tool outputs only show file names.
359
+ - The agent ignored a high-value lead without checking its call sites.
360
+
361
+ *** EVALUATION PATHS ***
362
+ 1. SUCCESS: Set status="ACCEPT". Write an exhaustive, highly detailed response addressed to the user.
363
+ 2. REWORK: Set status="REJECT". Write strict, non-repetitive, targeted feedback addressed to the Junior Researcher. DO NOT address the user.
364
+ """
365
+
366
+
367
+ MAX_FILES_TO_CREATE_VECTOR_DB = 6000