Spaces:
Sleeping
Sleeping
| # ./core_logic.py -> Token-safe | |
| import os | |
| import re # Added for structural artifact code block extraction | |
| from groq import Groq | |
| from tools import web_search, parse_file | |
| import yaml | |
| import toml | |
| from docx import Document | |
| client = Groq(api_key=os.getenv("GROQ_API_KEY")) | |
| model = "llama-3.1-8b-instant" | |
| # Verify write permissions to 'outputs' directory | |
| def verify_permissions(): | |
| test_file = "permission_test.txt" | |
| try: | |
| with open(test_file, "w") as f: | |
| f.write("test") | |
| os.remove(test_file) | |
| print("✅ Write permissions verified.") | |
| except Exception as e: | |
| print(f"❌ PERMISSION ERROR: {e}") | |
| verify_permissions() | |
| # Compressed for token efficiency | |
| #SYSTEM_PROMPT = ( | |
| # "You're a Full-stack AI Engineering Genius. " | |
| # "Expert in Python (latest production version), Agentic Loops, and FastAPI, NodeJS, HTML, CSS. " | |
| # "Provide production-ready code with needed comments. Analyze files when provided. Be concise." | |
| #) | |
| SYSTEM_PROMPT = """ | |
| You are the 'Silicon Architect'—a master-stroke Full-stack AI Engineering and Technical Architecture Dev-Ops, and a Knowledgeable, Socratic-Inquirer, Instructor. | |
| Your goal is to provide production-grade, highly optimized solutions for web and mobile AI and Agentic applications. | |
| Expertise: | |
| . Python (latest production version), Agentic Loops, FastAPI, Scalable Architecture. | |
| . Provide production-ready code with appropriate comments, based in rigorous technical research. | |
| . Analyze provided files thoroughly; propose suitable recommendations | |
| . Be sharp, precise, concise. | |
| CORE DIRECTIVES: | |
| 1. ARCHITECTURAL RIGOR: Always consider scalability, async patterns, and state management. | |
| 2. AGENTIC EXPERTISE: You understand recurrent-depth simulations, tool-calling, and autonomous loops. | |
| 3. CODE QUALITY: Write clean, PEP 8 compliant, appropriately commented upon, secure Python/JS code. | |
| 4. FIRST PRINCIPLES: Base your responses and reasoning in Richard Feynman’s first principles thinking. Break down complex problems into fundamental truths and reason up from there | |
| 5. PRIORITIZE ESSENTIALS: Focused on - the "must haves" before the "good to have" - having the fundamentals worked-out/implemented; stay clear of over-engineering | |
| 5. OCKHAM'S RAZOR: Prefer simple yet robust and scalabie solutions without compromising on needed deliverables. | |
| 6. INNOVATION: Suggest latest libraries and frameworks (FastAPI, LangGraph, Pydantic AI; but not limited to these). | |
| 7. TAVILY WEB SEARCH: This has max 400 characters limit, so be concise and strategic in keyword selection; use the micro-turn distillation technique to compact and optimize the search query. | |
| 8. ACTIVE CONTRIBUTOR: Actively recommend enhancements yet without jeopardzing the core requirements; the point is to be proactive in identifying potential improvements and optimizations. | |
| 9. FORESIGHT INSIGHT: Anticipate potential pitfalls and edge cases, have them all proactively addressed in your solutions. | |
| 10. RESEARCH: If the user asks about new tech, use your Web Search capability to provide factual, up-to-date documentation. | |
| 11. ERROR HANDLING: Always include robust error handling, write descriptive error messages that include the offending value. | |
| 12. SECURITY: Always consider security implications, and implement best practices to mitigate vulnerabilities (e.g., input validation, sanitization, secure defaults). | |
| 13. README.md: While working on projects, prepare and maintain - for each projct - a README.md outlinining: | |
| . project scope, | |
| . requrirements, | |
| . expected outcome, | |
| . core tools and tech-stack employed, | |
| . UML, Flowcharts, Block-diagrams, and other graphics as applicable, | |
| . a brief explanation of each module/file (such *.py, *.html, *.css, *.js, etc.) in the project, with | |
| . details about functionalities implemented and working, and about pending/planned implementations, | |
| . other relevant details of use to the DEV team; | |
| . iterate the foundational README.md as the project progresses, ensuring it aligns with the latest functional state of the project, and maintain a copy of the last updated README.md with the addition of suffix "_-1", such that README_-1.md. | |
| PERSONALITY: | |
| 1. POLITE & ASSERTIVE : Disagree with the user, if needed; never resort to sycophancy. | |
| 2. INQUIRE: Formulate necessary questions as deemed fit, suggest better alternatives when need be. | |
| 3. PROFESSIONALISM: You're a Senior AI Solutions Architect, maintain a technical excellence of one professional, grounded, humane. | |
| When a user provides files, analyze the requirement, structure, logic before proposing changes. | |
| """ | |
| def chat_function(message, history): | |
| user_text = message.get("text", "") | |
| files = message.get("files", []) | |
| # Context Aggregator Buffer for all multi-format assets | |
| context_from_files = "" | |
| # 1. Process Multimodal and Extended Multi-format Files via Perception Agent | |
| if files: | |
| from perception_agent import read_document_file | |
| yield "◌ _Perception Agent initialized: Ingesting uploaded file assets..._" | |
| for f in files: | |
| # Gradio 6 handles file entries either as dictionaries with a 'path' key or flat strings | |
| path = f["path"] if isinstance(f, dict) else f | |
| if path and os.path.exists(path): | |
| file_content = read_document_file(path) | |
| context_from_files += file_content | |
| yield "◌ _Perception processing complete. Transmitting compiled structures to the Brain..._" | |
| # TRUNCATE FILE CONTEXT: Max ~3000 tokens (approx 12,000 chars) | |
| if len(context_from_files) > 12000: | |
| context_from_files = context_from_files[:12000] + "\n...[File Content Truncated for TPM Limits]..." | |
| # 2. Research Trigger | |
| if any(keyword in user_text.lower() for keyword in ["search", "docs", "latest"]): | |
| # Use a fast micro-turn to distill the massive user prompt into optimized keywords | |
| distill_response = client.chat.completions.create( | |
| model="llama-3.1-8b-instant", | |
| messages=[ | |
| { | |
| "role": "system", | |
| "content": ( | |
| "You are a search query optimizer tool. Your ONLY job is to take the user's long request and turn it into a short, effective, plain-text, web search query for finding relevant technical programming documentation.\n\n" | |
| "Critical Rules:\n" | |
| "1. Do NOT answer the user's prompt.\n" | |
| "2. Do NOT write code blocks, code explanations, tasks, or JSON data structures.\n" | |
| "3. Your entire output must be a single sentence under 50 characters.\n" | |
| "4. If the user provides a code file or raw data logs, ignore the text content and generate a query searching for the underlying concept (e.g., 'Scapy network sniffing documentation python').\n" | |
| "5. Output ONLY raw keywords.\n" | |
| "6. NEVER use markdown, backticks, or code blocks.\n" | |
| "7. NEVER wrap your output in single or double quotes.\n" | |
| "8. Maximum 5 words, under 50 characters total." | |
| ) | |
| }, | |
| { | |
| "role": "user", | |
| "content": f"Convert the following request into raw optimized search keywords based on your system rules:\n\n{user_text}" | |
| } | |
| ], | |
| temperature=0.0, | |
| ) | |
| # Extract and aggressively sanitize the string programmatically | |
| raw_query = distill_response.choices[0].message.content.strip() | |
| # Strip away any lingering quotes, backticks, or markdown syntax characters | |
| optimized_query = re.sub(r"[`'\"\\n\-*#\[\]]", "", raw_query) | |
| # Defensive Guardrail: Ensure query fits under Tavily's 400-character ceiling | |
| if len(optimized_query) > 390: | |
| # Option 1: Extract just the first line or clip the characters safely | |
| optimized_query = optimized_query[:390].rpartition(' ')[0] | |
| # Clean up any residual markdown symbols the model leaked | |
| optimized_query = optimized_query.replace("`", "").replace("python", "").strip() | |
| print(f"\nlen optimized_query: {len(optimized_query)}") # Debug log for query length | |
| print(f"\nOptimized Search Query: '{optimized_query}'") # Debug log for the optimized query | |
| # Executing clean, highly target web search under the 400-character cap | |
| research_context = web_search(optimized_query) | |
| #print(f"\nResearch Context Retrieved: {research_context[:500]}...") | |
| print(f"\nResearch Context Retrieved: {research_context}...") # Debug log for research context snippet | |
| prompt = f"RESEARCH:\n{research_context}\n\nFILES:\n{context_from_files}\n\nUSER: {optimized_query}" | |
| #research_context = web_search(user_text) | |
| #prompt = f"RESEARCH:\n{research_context}\n\nFILES:\n{context_from_files}\n\nUSER: {user_text}" | |
| else: | |
| prompt = f"FILES:\n{context_from_files}\n\nUSER: {user_text}" | |
| # 3. Build Messages with History Slicing | |
| messages = [{"role": "system", "content": SYSTEM_PROMPT}] | |
| # ONLY KEEP LAST 3 TURNS: This is the 'Master Stroke' for staying under 6k TPM | |
| for turn in history[-3:]: | |
| messages.append({"role": turn["role"], "content": turn["content"]}) | |
| messages.append({"role": "user", "content": prompt}) | |
| # ============================================================================================= | |
| # 🎯DIAGNOSTICS FOR THE LENGTH OF LIST PAYLOAD BEING SENT TO THE PROVIDER, WHICH IT CAN HANDLE | |
| # ============================================================================================= | |
| print("\n==================================================") | |
| print(f"📊 Sending {len(messages)} raw message blocks to the {model}.") | |
| print("==================================================\n") | |
| # ==================================================================== | |
| try: | |
| completion = client.chat.completions.create( | |
| model=model, | |
| messages=messages, | |
| stream=True, | |
| temperature=0.2, | |
| #max_tokens=1024 # Limit response size to prevent mid-stream cuts | |
| ) | |
| response_text = "" | |
| # Step 1: Stream the raw LLM output token by token to the user | |
| for chunk in completion: | |
| if chunk.choices and chunk.choices[0].delta.content: | |
| token = chunk.choices[0].delta.content | |
| response_text += token | |
| yield response_text | |
| # ARTIFACT CHECK: Scan the response text for any code block structures | |
| # This matches strings enclosed within triple backticks ``` | |
| has_code_blocks = bool(re.search(r"```[\s\S]*?```", response_text)) | |
| if has_code_blocks: | |
| # ONLY execute file creation and staging alerts if an artifact is detected | |
| # Step 2: Transition seamlessly to Local File Generation | |
| yield response_text + "\n\n◌ _File agent initialized: Generating local documentation workspace..._" | |
| from file_agent import write_document | |
| import shutil | |
| filename = "COURSE_README.md" | |
| backup_filename = "COURSE_README_-1.md" | |
| # Proactively manage historical backup copy before writing fresh file state | |
| src_path = os.path.join("outputs", filename) | |
| dst_path = os.path.join("outputs", backup_filename) | |
| if os.path.exists(src_path): | |
| try: | |
| shutil.copy2(src_path, dst_path) | |
| except Exception as e: | |
| from agent_logging import log_agent_action | |
| log_agent_action("BACKUP_ERROR", f"Failed to cycle historical version file: {str(e)}") | |
| # Write fresh incoming file generation | |
| file_path = write_document(response_text, filename) | |
| print(f"\nGenerated file at: {file_path}") | |
| # Step 3: Inform the UI that the material is staged and ready for the GitHub authorization layer | |
| if "Error" not in file_path: | |
| yield response_text + f"\n\n✅ _Files successfully generated in localized staging environment._\n\n◌ _Awaiting authorization control panel to push to GitHub._" | |
| else: | |
| yield response_text + f"\n\n❌ _File generation failed: {file_path}_" | |
| except Exception as e: | |
| yield f"Error: {str(e)}" | |