Spaces:

prashantmatlani
/

coderg

Running

App Files Files Community

prashantmatlani commited on about 20 hours ago

Commit

a5b3aff

1 Parent(s): 790aee3

updated prompt, model

Browse files

Files changed (2) hide show

core_logic.py +1 -1
core_logic_local.py +81 -0

core_logic.py CHANGED Viewed

@@ -51,7 +51,7 @@ def chat_function(message, history):
             model=model,
             messages=messages,
             stream=True,
-            temperature=0.2,
             max_tokens=1024 # Limit response size to prevent mid-stream cuts
         )

             model=model,
             messages=messages,
             stream=True,
+            temperature=0.0,
             max_tokens=1024 # Limit response size to prevent mid-stream cuts
         )

core_logic_local.py ADDED Viewed

	@@ -0,0 +1,81 @@

+# ./core_logic_local.py
+from openai import OpenAI
+import os
+# Ollama serves an OpenAI-compatible API locally at port 11434
+client = OpenAI(
+    base_url='http://localhost:11434/v1',
+    api_key='ollama', # Required but ignored by Ollama
+)
+# Use local model served by Ollama. Make sure to run: ollama serve gemma4
+model = "gemma4:latest"
+# Compressed for token efficiency
+SYSTEM_PROMPT = (
+    "You're a Full-stack AI Engineering Genius. "
+    "Expert in Python (latest production version), Agentic Loops, and FastAPI, NodeJS, HTML, CSS. "
+    "Provide production-ready code with needed comments. Analyze files when provided. Be concise."
+)
+def chat_function(message, history):
+    user_text = message.get("text", "")
+    files = message.get("files", [])
+    # 1. Process Files with character limits
+    context_from_files = ""
+    for f in files:
+        path = f["path"] if isinstance(f, dict) else f
+        file_content = parse_file(path)
+        context_from_files += file_content
+    # TRUNCATE FILE CONTEXT: Max ~3000 tokens (approx 12,000 chars)
+    if len(context_from_files) > 12000:
+        context_from_files = context_from_files[:12000] + "\n...[File Content Truncated for TPM Limits]..."
+    # 2. Research Trigger
+    if any(keyword in user_text.lower() for keyword in ["search", "docs", "latest"]):
+        research_context = web_search(user_text)
+        prompt = f"RESEARCH:\n{research_context}\n\nFILES:\n{context_from_files}\n\nUSER: {user_text}"
+    else:
+        prompt = f"FILES:\n{context_from_files}\n\nUSER: {user_text}"
+    # 3. Build Messages with History Slicing
+    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+    # ONLY KEEP LAST 3 TURNS: This is the 'Master Stroke' for staying under 6k TPM
+    for turn in history[-3:]:
+        messages.append({"role": turn["role"], "content": turn["content"]})
+    messages.append({"role": "user", "content": prompt})
+    try:
+        completion = client.chat.completions.create(
+            model=model,
+            messages=messages,
+            stream=True,
+            temperature=0.2,
+            max_tokens=1024 # Limit response size to prevent mid-stream cuts
+        )
+        response_text = ""
+        for chunk in completion:
+            if chunk.choices and chunk.choices[0].delta.content:
+                token = chunk.choices[0].delta.content
+                response_text += token
+                yield response_text
+    except Exception as e:
+        yield f"Error: {str(e)}"