File size: 2,417 Bytes
0ea40d5
b83d944
0ea40d5
 
b83d944
3a187d4
cf308ec
b83d944
 
0ea40d5
b83d944
 
 
 
 
3a187d4
0ea40d5
 
b5a0b96
 
0ea40d5
3a187d4
b5a0b96
 
 
3a187d4
 
0ea40d5
3a187d4
 
b83d944
3a187d4
 
b5a0b96
 
 
0ea40d5
b5a0b96
0ea40d5
3a187d4
0ea40d5
fdc8455
b83d944
3a187d4
fdc8455
 
b5a0b96
0ea40d5
fdc8455
3a187d4
1911701
3a187d4
 
b83d944
eb874bf
3a187d4
 
 
 
b83d944
fdc8455
b83d944
 
fdc8455
b83d944
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

# ./core_logic.py -> Token-safe

import os
from groq import Groq
from tools import web_search, parse_file

client = Groq(api_key=os.getenv("GROQ_API_KEY"))
model = "llama-3.1-8b-instant"

# Compressed for token efficiency
SYSTEM_PROMPT = (
    "You're a Full-stack AI Engineering Genius. "
    "Expert in Python (latest production version), Agentic Loops, and FastAPI, NodeJS, HTML, CSS. "
    "Provide production-ready code with needed comments. Analyze files when provided. Be concise."
)

def chat_function(message, history):
    user_text = message.get("text", "")
    files = message.get("files", [])
    
    # 1. Process Files with character limits
    context_from_files = ""
    for f in files:
        path = f["path"] if isinstance(f, dict) else f
        file_content = parse_file(path)
        context_from_files += file_content
    
    # TRUNCATE FILE CONTEXT: Max ~3000 tokens (approx 12,000 chars)
    if len(context_from_files) > 12000:
        context_from_files = context_from_files[:12000] + "\n...[File Content Truncated for TPM Limits]..."

    # 2. Research Trigger
    if any(keyword in user_text.lower() for keyword in ["search", "docs", "latest"]):
        research_context = web_search(user_text)
        prompt = f"RESEARCH:\n{research_context}\n\nFILES:\n{context_from_files}\n\nUSER: {user_text}"
    else:
        prompt = f"FILES:\n{context_from_files}\n\nUSER: {user_text}"

    # 3. Build Messages with History Slicing
    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
    
    # ONLY KEEP LAST 3 TURNS: This is the 'Master Stroke' for staying under 6k TPM
    for turn in history[-3:]:
        messages.append({"role": turn["role"], "content": turn["content"]})
        
    messages.append({"role": "user", "content": prompt})

    try:
        completion = client.chat.completions.create(
            model=model,
            messages=messages,
            stream=True,
            temperature=0.0,
            #max_tokens=1024 # Limit response size to prevent mid-stream cuts
        )

        response_text = ""
        for chunk in completion:
            if chunk.choices and chunk.choices[0].delta.content:
                token = chunk.choices[0].delta.content
                response_text += token
                yield response_text
    except Exception as e:
        yield f"Error: {str(e)}"