prashantmatlani commited on
Commit
3a187d4
·
1 Parent(s): 94e5bdb

token safe core_logic

Browse files
Files changed (2) hide show
  1. core_logic.py +32 -44
  2. core_logic_earlier.py +91 -0
core_logic.py CHANGED
@@ -6,75 +6,63 @@ The Inference Engine - Where the "Technical Genius" persona lives. It uses the h
6
  """
7
 
8
  import os
9
- from huggingface_hub import InferenceClient
10
- from tools import web_search, parse_file
11
  from groq import Groq
 
12
 
13
  client = Groq(api_key=os.getenv("GROQ_API_KEY"))
14
 
15
- # Recommended: Qwen2.5-Coder-32B or Llama-3.1-70B-Instruct
16
- #client = InferenceClient("deepseek-ai/DeepSeek-V4-Pro", token=os.getenv("HF_TOKEN"))
17
- #client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct", token=os.getenv("HF_TOKEN"))
18
- #client = InferenceClient("Qwen/Qwen2.5-Coder-7B-Instruct", token=os.getenv("HF_TOKEN"))
19
- #client = InferenceClient("llama-3.1-8b-instant", token=os.getenv("HF_TOKEN")) "llama-3.1-70b-versatile" -> GROQ API
20
- #client = InferenceClient("meta-llama/Llama-3.1-8B-Instruct", token=os.getenv("HF_TOKEN")) # Or "Qwen/Qwen2.5-72B-Instruct"
21
-
22
-
23
- SYSTEM_PROMPT = """
24
- You are the 'Silicon Architect'—a master-stroke creative genius in AI Engineering and Technical Architecture.
25
- Your goal is to provide production-grade, highly optimized solutions for web and mobile AI applications.
26
-
27
- Expertise: Python 3.12, Agentic Loops, FastAPI, and Scalable Architecture.
28
- Provide production-ready code and rigorous technical research.
29
-
30
- CORE DIRECTIVES:
31
- 1. ARCHITECTURAL RIGOR: Always consider scalability, async patterns, and state management.
32
- 2. AGENTIC EXPERTISE: You understand recurrent-depth simulations, tool-calling, and autonomous loops.
33
- 3. CODE QUALITY: Write clean, PEP 8 compliant, and secure Python/JS code.
34
- 4. INNOVATION: Suggest the latest libraries and frameworks (FastAPI, LangGraph, Pydantic AI; but not limited to these).
35
- 5. RESEARCH: If the user asks about new tech, use your Web Search capability to provide factual, up-to-date documentation.
36
-
37
- PERSONALITY:
38
- 1. FRANK/POLITE: Disagree with the user, if needed; never resort to sycophancy, and suggest better alternatives
39
- 2. HUMBLE: Apologize when mistaken
40
- 3. FIRST PRINCIPLES: Base your responses and reasoning in Richard Feynman’s first principles thinking. Break down complex problems into fundamental truths and reason up from there
41
-
42
- When a user provides files, analyze the code structure and logic before proposing changes.
43
- """
44
 
45
  def chat_function(message, history):
46
  user_text = message.get("text", "")
47
  files = message.get("files", [])
48
 
 
49
  context_from_files = ""
50
  for f in files:
51
  path = f["path"] if isinstance(f, dict) else f
52
- context_from_files += parse_file(path)
 
53
 
 
 
 
 
 
54
  if any(keyword in user_text.lower() for keyword in ["search", "docs", "latest"]):
55
  research_context = web_search(user_text)
56
  prompt = f"RESEARCH:\n{research_context}\n\nFILES:\n{context_from_files}\n\nUSER: {user_text}"
57
  else:
58
  prompt = f"FILES:\n{context_from_files}\n\nUSER: {user_text}"
59
 
 
60
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
61
 
62
- # Ensure history is in the correct format for the API
63
- for turn in history:
64
  messages.append({"role": turn["role"], "content": turn["content"]})
65
 
66
  messages.append({"role": "user", "content": prompt})
67
 
68
- response_text = ""
69
  try:
70
- #for chunk in client.chat_completion(messages, max_tokens=2048, stream=True, temperature=0.2):
71
- # --- Uncomment below for GROQ
72
- for chunk in client.chat.completions.create(model="llama-3.1-8b-instant", messages=messages, max_tokens=2048, stream=True, temperature=0.2): # Or model="llama-3.1-70b-versatile"
73
- # FIX: Check if choices exists and is not empty
74
- if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
 
 
 
 
 
 
75
  token = chunk.choices[0].delta.content
76
- if token:
77
- response_text += token
78
- yield response_text
79
  except Exception as e:
80
- yield f"Architecture Error: {str(e)}"
 
6
  """
7
 
8
  import os
 
 
9
  from groq import Groq
10
+ from tools import web_search, parse_file
11
 
12
  client = Groq(api_key=os.getenv("GROQ_API_KEY"))
13
 
14
+ # Compressed for token efficiency
15
+ SYSTEM_PROMPT = (
16
+ "You are 'Silicon Architect', an AI Engineering Genius. "
17
+ "Expert in Python (latest production version), Agentic Loops, and FastAPI, NodeJS, HTML, CSS. "
18
+ "Provide production-ready code. Analyze files first. Be concise."
19
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def chat_function(message, history):
22
  user_text = message.get("text", "")
23
  files = message.get("files", [])
24
 
25
+ # 1. Process Files with character limits
26
  context_from_files = ""
27
  for f in files:
28
  path = f["path"] if isinstance(f, dict) else f
29
+ file_content = parse_file(path)
30
+ context_from_files += file_content
31
 
32
+ # TRUNCATE FILE CONTEXT: Max ~3000 tokens (approx 12,000 chars)
33
+ if len(context_from_files) > 12000:
34
+ context_from_files = context_from_files[:12000] + "\n...[File Content Truncated for TPM Limits]..."
35
+
36
+ # 2. Research Trigger
37
  if any(keyword in user_text.lower() for keyword in ["search", "docs", "latest"]):
38
  research_context = web_search(user_text)
39
  prompt = f"RESEARCH:\n{research_context}\n\nFILES:\n{context_from_files}\n\nUSER: {user_text}"
40
  else:
41
  prompt = f"FILES:\n{context_from_files}\n\nUSER: {user_text}"
42
 
43
+ # 3. Build Messages with History Slicing
44
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
45
 
46
+ # ONLY KEEP LAST 3 TURNS: This is the 'Master Stroke' for staying under 6k TPM
47
+ for turn in history[-3:]:
48
  messages.append({"role": turn["role"], "content": turn["content"]})
49
 
50
  messages.append({"role": "user", "content": prompt})
51
 
 
52
  try:
53
+ completion = client.chat.completions.create(
54
+ model="llama-3.1-8b-instant",
55
+ messages=messages,
56
+ stream=True,
57
+ temperature=0.2,
58
+ max_tokens=1024 # Limit response size to prevent mid-stream cuts
59
+ )
60
+
61
+ response_text = ""
62
+ for chunk in completion:
63
+ if chunk.choices and chunk.choices[0].delta.content:
64
  token = chunk.choices[0].delta.content
65
+ response_text += token
66
+ yield response_text
 
67
  except Exception as e:
68
+ yield f"TPM/Rate Limit Error: {str(e)}"
core_logic_earlier.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # ./core_logic.py
3
+
4
+ """
5
+ The Inference Engine - Where the "Technical Genius" persona lives. It uses the huggingface_hub InferenceClient to run the model without local CPU strain
6
+ """
7
+
8
+ import os
9
+ from huggingface_hub import InferenceClient
10
+ from tools import web_search, parse_file
11
+ from groq import Groq
12
+
13
+ client = Groq(api_key=os.getenv("GROQ_API_KEY"))
14
+
15
+ # Recommended: Qwen2.5-Coder-32B or Llama-3.1-70B-Instruct
16
+ #client = InferenceClient("deepseek-ai/DeepSeek-V4-Pro", token=os.getenv("HF_TOKEN"))
17
+ #client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct", token=os.getenv("HF_TOKEN"))
18
+ #client = InferenceClient("Qwen/Qwen2.5-Coder-7B-Instruct", token=os.getenv("HF_TOKEN"))
19
+ #client = InferenceClient("llama-3.1-8b-instant", token=os.getenv("HF_TOKEN")) "llama-3.1-70b-versatile" -> GROQ API
20
+ #client = InferenceClient("meta-llama/Llama-3.1-8B-Instruct", token=os.getenv("HF_TOKEN")) # Or "Qwen/Qwen2.5-72B-Instruct"
21
+
22
+
23
+ SYSTEM_PROMPT = """
24
+ You are the 'Silicon Architect'—a master-stroke creative genius in AI Engineering and Technical Architecture.
25
+ Your goal is to provide production-grade, highly optimized solutions for web and mobile AI applications.
26
+
27
+ Expertise: Python 3.12, Agentic Loops, FastAPI, and Scalable Architecture.
28
+ Provide production-ready code and rigorous technical research.
29
+
30
+ CORE DIRECTIVES:
31
+ 1. ARCHITECTURAL RIGOR: Always consider scalability, async patterns, and state management.
32
+ 2. AGENTIC EXPERTISE: You understand recurrent-depth simulations, tool-calling, and autonomous loops.
33
+ 3. CODE QUALITY: Write clean, PEP 8 compliant, and secure Python/JS code.
34
+ 4. INNOVATION: Suggest the latest libraries and frameworks (FastAPI, LangGraph, Pydantic AI; but not limited to these).
35
+ 5. RESEARCH: If the user asks about new tech, use your Web Search capability to provide factual, up-to-date documentation.
36
+
37
+ PERSONALITY:
38
+ 1. FRANK/POLITE: Disagree with the user, if needed; never resort to sycophancy, and suggest better alternatives
39
+ 2. HUMBLE: Apologize when mistaken
40
+ 3. FIRST PRINCIPLES: Base your responses and reasoning in Richard Feynman’s first principles thinking. Break down complex problems into fundamental truths and reason up from there
41
+
42
+ When a user provides files, analyze the code structure and logic before proposing changes.
43
+ """
44
+
45
+ def chat_function(message, history):
46
+ user_text = message.get("text", "")
47
+ files = message.get("files", [])
48
+
49
+ context_from_files = ""
50
+ for f in files:
51
+ path = f["path"] if isinstance(f, dict) else f
52
+ context_from_files += parse_file(path)
53
+
54
+ """
55
+ # MASTER STROKE: Context Management
56
+ # Limit history to the last 4 turns to save tokens
57
+ recent_history = history[-4:] if len(history) > 4 else history
58
+
59
+ # LIMIT file context: If context is too long, truncate it
60
+ MAX_FILE_CHARS = 10000 # Roughly 2.5k tokens
61
+ if len(context_from_files) > MAX_FILE_CHARS:
62
+ context_from_files = context_from_files[:MAX_FILE_CHARS] + "\n...[Content Truncated for Limit]..."
63
+ """
64
+
65
+ if any(keyword in user_text.lower() for keyword in ["search", "docs", "latest"]):
66
+ research_context = web_search(user_text)
67
+ prompt = f"RESEARCH:\n{research_context}\n\nFILES:\n{context_from_files}\n\nUSER: {user_text}"
68
+ else:
69
+ prompt = f"FILES:\n{context_from_files}\n\nUSER: {user_text}"
70
+
71
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
72
+
73
+ # Ensure history is in the correct format for the API
74
+ for turn in history:
75
+ messages.append({"role": turn["role"], "content": turn["content"]})
76
+
77
+ messages.append({"role": "user", "content": prompt})
78
+
79
+ response_text = ""
80
+ try:
81
+ #for chunk in client.chat_completion(messages, max_tokens=2048, stream=True, temperature=0.2):
82
+ # --- Uncomment below for GROQ
83
+ for chunk in client.chat.completions.create(model="llama-3.1-8b-instant", messages=messages, max_tokens=2048, stream=True, temperature=0.2): # Or model="llama-3.1-70b-versatile"
84
+ # FIX: Check if choices exists and is not empty
85
+ if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
86
+ token = chunk.choices[0].delta.content
87
+ if token:
88
+ response_text += token
89
+ yield response_text
90
+ except Exception as e:
91
+ yield f"Architecture Error: {str(e)}"