Spaces:

prashantmatlani
/

coderg

Running

App Files Files Community

prashantmatlani commited on 17 days ago

Commit

3a187d4

1 Parent(s): 94e5bdb

token safe core_logic

Browse files

Files changed (2) hide show

core_logic.py +32 -44
core_logic_earlier.py +91 -0

core_logic.py CHANGED Viewed

@@ -6,75 +6,63 @@ The Inference Engine - Where the "Technical Genius" persona lives. It uses the h
 """
 import os
-from huggingface_hub import InferenceClient
-from tools import web_search, parse_file
 from groq import Groq
 client = Groq(api_key=os.getenv("GROQ_API_KEY"))
-# Recommended: Qwen2.5-Coder-32B or Llama-3.1-70B-Instruct
-#client = InferenceClient("deepseek-ai/DeepSeek-V4-Pro", token=os.getenv("HF_TOKEN"))
-#client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct", token=os.getenv("HF_TOKEN"))
-#client = InferenceClient("Qwen/Qwen2.5-Coder-7B-Instruct", token=os.getenv("HF_TOKEN"))
-#client = InferenceClient("llama-3.1-8b-instant", token=os.getenv("HF_TOKEN")) "llama-3.1-70b-versatile" -> GROQ API
-#client = InferenceClient("meta-llama/Llama-3.1-8B-Instruct", token=os.getenv("HF_TOKEN")) # Or "Qwen/Qwen2.5-72B-Instruct"
-SYSTEM_PROMPT = """
-You are the 'Silicon Architect'—a master-stroke creative genius in AI Engineering and Technical Architecture.
-Your goal is to provide production-grade, highly optimized solutions for web and mobile AI applications.
-Expertise: Python 3.12, Agentic Loops, FastAPI, and Scalable Architecture.
-Provide production-ready code and rigorous technical research.
-CORE DIRECTIVES:
-1. ARCHITECTURAL RIGOR: Always consider scalability, async patterns, and state management.
-2. AGENTIC EXPERTISE: You understand recurrent-depth simulations, tool-calling, and autonomous loops.
-3. CODE QUALITY: Write clean, PEP 8 compliant, and secure Python/JS code.
-4. INNOVATION: Suggest the latest libraries and frameworks (FastAPI, LangGraph, Pydantic AI; but not limited to these).
-5. RESEARCH: If the user asks about new tech, use your Web Search capability to provide factual, up-to-date documentation.
-PERSONALITY:
-1. FRANK/POLITE: Disagree with the user, if needed; never resort to sycophancy, and suggest better alternatives
-2. HUMBLE: Apologize when mistaken
-3. FIRST PRINCIPLES: Base your responses and reasoning in Richard Feynman’s first principles thinking. Break down complex problems into fundamental truths and reason up from there
-When a user provides files, analyze the code structure and logic before proposing changes.
-"""
 def chat_function(message, history):
     user_text = message.get("text", "")
     files = message.get("files", [])
     context_from_files = ""
     for f in files:
         path = f["path"] if isinstance(f, dict) else f
-        context_from_files += parse_file(path)
     if any(keyword in user_text.lower() for keyword in ["search", "docs", "latest"]):
         research_context = web_search(user_text)
         prompt = f"RESEARCH:\n{research_context}\n\nFILES:\n{context_from_files}\n\nUSER: {user_text}"
     else:
         prompt = f"FILES:\n{context_from_files}\n\nUSER: {user_text}"
     messages = [{"role": "system", "content": SYSTEM_PROMPT}]
-    # Ensure history is in the correct format for the API
-    for turn in history:
         messages.append({"role": turn["role"], "content": turn["content"]})
     messages.append({"role": "user", "content": prompt})
-    response_text = ""
     try:
-        #for chunk in client.chat_completion(messages, max_tokens=2048, stream=True, temperature=0.2):
-        # --- Uncomment below for GROQ
-        for chunk in client.chat.completions.create(model="llama-3.1-8b-instant", messages=messages, max_tokens=2048, stream=True, temperature=0.2): # Or model="llama-3.1-70b-versatile"
-            # FIX: Check if choices exists and is not empty
-            if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
                 token = chunk.choices[0].delta.content
-                if token:
-                    response_text += token
-                    yield response_text
     except Exception as e:
-        yield f"Architecture Error: {str(e)}"

 """
 import os
 from groq import Groq
+from tools import web_search, parse_file
 client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+# Compressed for token efficiency
+SYSTEM_PROMPT = (
+    "You are 'Silicon Architect', an AI Engineering Genius. "
+    "Expert in Python (latest production version), Agentic Loops, and FastAPI, NodeJS, HTML, CSS. "
+    "Provide production-ready code. Analyze files first. Be concise."
+)
 def chat_function(message, history):
     user_text = message.get("text", "")
     files = message.get("files", [])
+    # 1. Process Files with character limits
     context_from_files = ""
     for f in files:
         path = f["path"] if isinstance(f, dict) else f
+        file_content = parse_file(path)
+        context_from_files += file_content
+    # TRUNCATE FILE CONTEXT: Max ~3000 tokens (approx 12,000 chars)
+    if len(context_from_files) > 12000:
+        context_from_files = context_from_files[:12000] + "\n...[File Content Truncated for TPM Limits]..."
+    # 2. Research Trigger
     if any(keyword in user_text.lower() for keyword in ["search", "docs", "latest"]):
         research_context = web_search(user_text)
         prompt = f"RESEARCH:\n{research_context}\n\nFILES:\n{context_from_files}\n\nUSER: {user_text}"
     else:
         prompt = f"FILES:\n{context_from_files}\n\nUSER: {user_text}"
+    # 3. Build Messages with History Slicing
     messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+    # ONLY KEEP LAST 3 TURNS: This is the 'Master Stroke' for staying under 6k TPM
+    for turn in history[-3:]:
         messages.append({"role": turn["role"], "content": turn["content"]})
     messages.append({"role": "user", "content": prompt})
     try:
+        completion = client.chat.completions.create(
+            model="llama-3.1-8b-instant",
+            messages=messages,
+            stream=True,
+            temperature=0.2,
+            max_tokens=1024 # Limit response size to prevent mid-stream cuts
+        )
+        response_text = ""
+        for chunk in completion:
+            if chunk.choices and chunk.choices[0].delta.content:
                 token = chunk.choices[0].delta.content
+                response_text += token
+                yield response_text
     except Exception as e:
+        yield f"TPM/Rate Limit Error: {str(e)}"

core_logic_earlier.py ADDED Viewed

	@@ -0,0 +1,91 @@

+# ./core_logic.py
+"""
+The Inference Engine - Where the "Technical Genius" persona lives. It uses the huggingface_hub InferenceClient to run the model without local CPU strain
+"""
+import os
+from huggingface_hub import InferenceClient
+from tools import web_search, parse_file
+from groq import Groq
+client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+# Recommended: Qwen2.5-Coder-32B or Llama-3.1-70B-Instruct
+#client = InferenceClient("deepseek-ai/DeepSeek-V4-Pro", token=os.getenv("HF_TOKEN"))
+#client = InferenceClient("Qwen/Qwen2.5-Coder-32B-Instruct", token=os.getenv("HF_TOKEN"))
+#client = InferenceClient("Qwen/Qwen2.5-Coder-7B-Instruct", token=os.getenv("HF_TOKEN"))
+#client = InferenceClient("llama-3.1-8b-instant", token=os.getenv("HF_TOKEN")) "llama-3.1-70b-versatile" -> GROQ API
+#client = InferenceClient("meta-llama/Llama-3.1-8B-Instruct", token=os.getenv("HF_TOKEN")) # Or "Qwen/Qwen2.5-72B-Instruct"
+SYSTEM_PROMPT = """
+You are the 'Silicon Architect'—a master-stroke creative genius in AI Engineering and Technical Architecture.
+Your goal is to provide production-grade, highly optimized solutions for web and mobile AI applications.
+Expertise: Python 3.12, Agentic Loops, FastAPI, and Scalable Architecture.
+Provide production-ready code and rigorous technical research.
+CORE DIRECTIVES:
+1. ARCHITECTURAL RIGOR: Always consider scalability, async patterns, and state management.
+2. AGENTIC EXPERTISE: You understand recurrent-depth simulations, tool-calling, and autonomous loops.
+3. CODE QUALITY: Write clean, PEP 8 compliant, and secure Python/JS code.
+4. INNOVATION: Suggest the latest libraries and frameworks (FastAPI, LangGraph, Pydantic AI; but not limited to these).
+5. RESEARCH: If the user asks about new tech, use your Web Search capability to provide factual, up-to-date documentation.
+PERSONALITY:
+1. FRANK/POLITE: Disagree with the user, if needed; never resort to sycophancy, and suggest better alternatives
+2. HUMBLE: Apologize when mistaken
+3. FIRST PRINCIPLES: Base your responses and reasoning in Richard Feynman’s first principles thinking. Break down complex problems into fundamental truths and reason up from there
+When a user provides files, analyze the code structure and logic before proposing changes.
+"""
+def chat_function(message, history):
+    user_text = message.get("text", "")
+    files = message.get("files", [])
+    context_from_files = ""
+    for f in files:
+        path = f["path"] if isinstance(f, dict) else f
+        context_from_files += parse_file(path)
+    """
+    # MASTER STROKE: Context Management
+    # Limit history to the last 4 turns to save tokens
+    recent_history = history[-4:] if len(history) > 4 else history
+    # LIMIT file context: If context is too long, truncate it
+    MAX_FILE_CHARS = 10000 # Roughly 2.5k tokens
+    if len(context_from_files) > MAX_FILE_CHARS:
+        context_from_files = context_from_files[:MAX_FILE_CHARS] + "\n...[Content Truncated for Limit]..."
+    """
+    if any(keyword in user_text.lower() for keyword in ["search", "docs", "latest"]):
+        research_context = web_search(user_text)
+        prompt = f"RESEARCH:\n{research_context}\n\nFILES:\n{context_from_files}\n\nUSER: {user_text}"
+    else:
+        prompt = f"FILES:\n{context_from_files}\n\nUSER: {user_text}"
+    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+    # Ensure history is in the correct format for the API
+    for turn in history:
+        messages.append({"role": turn["role"], "content": turn["content"]})
+    messages.append({"role": "user", "content": prompt})
+    response_text = ""
+    try:
+        #for chunk in client.chat_completion(messages, max_tokens=2048, stream=True, temperature=0.2):
+        # --- Uncomment below for GROQ
+        for chunk in client.chat.completions.create(model="llama-3.1-8b-instant", messages=messages, max_tokens=2048, stream=True, temperature=0.2): # Or model="llama-3.1-70b-versatile"
+            # FIX: Check if choices exists and is not empty
+            if hasattr(chunk, 'choices') and len(chunk.choices) > 0:
+                token = chunk.choices[0].delta.content
+                if token:
+                    response_text += token
+                    yield response_text
+    except Exception as e:
+        yield f"Architecture Error: {str(e)}"