# ./core_logic.py -> Token-safe import os from groq import Groq from tools import web_search, parse_file client = Groq(api_key=os.getenv("GROQ_API_KEY")) model = "llama-3.1-8b-instant" # Compressed for token efficiency SYSTEM_PROMPT = ( "You're a Full-stack AI Engineering Genius. " "Expert in Python (latest production version), Agentic Loops, and FastAPI, NodeJS, HTML, CSS. " "Provide production-ready code with needed comments. Analyze files when provided. Be concise." ) def chat_function(message, history): user_text = message.get("text", "") files = message.get("files", []) # 1. Process Files with character limits context_from_files = "" for f in files: path = f["path"] if isinstance(f, dict) else f file_content = parse_file(path) context_from_files += file_content # TRUNCATE FILE CONTEXT: Max ~3000 tokens (approx 12,000 chars) if len(context_from_files) > 12000: context_from_files = context_from_files[:12000] + "\n...[File Content Truncated for TPM Limits]..." # 2. Research Trigger if any(keyword in user_text.lower() for keyword in ["search", "docs", "latest"]): research_context = web_search(user_text) prompt = f"RESEARCH:\n{research_context}\n\nFILES:\n{context_from_files}\n\nUSER: {user_text}" else: prompt = f"FILES:\n{context_from_files}\n\nUSER: {user_text}" # 3. Build Messages with History Slicing messages = [{"role": "system", "content": SYSTEM_PROMPT}] # ONLY KEEP LAST 3 TURNS: This is the 'Master Stroke' for staying under 6k TPM for turn in history[-3:]: messages.append({"role": turn["role"], "content": turn["content"]}) messages.append({"role": "user", "content": prompt}) try: completion = client.chat.completions.create( model=model, messages=messages, stream=True, temperature=0.0, max_tokens=1024 # Limit response size to prevent mid-stream cuts ) response_text = "" for chunk in completion: if chunk.choices and chunk.choices[0].delta.content: token = chunk.choices[0].delta.content response_text += token yield response_text except Exception as e: yield f"Error: {str(e)}"