prashantmatlani commited on
Commit
a5b3aff
·
1 Parent(s): 790aee3

updated prompt, model

Browse files
Files changed (2) hide show
  1. core_logic.py +1 -1
  2. core_logic_local.py +81 -0
core_logic.py CHANGED
@@ -51,7 +51,7 @@ def chat_function(message, history):
51
  model=model,
52
  messages=messages,
53
  stream=True,
54
- temperature=0.2,
55
  max_tokens=1024 # Limit response size to prevent mid-stream cuts
56
  )
57
 
 
51
  model=model,
52
  messages=messages,
53
  stream=True,
54
+ temperature=0.0,
55
  max_tokens=1024 # Limit response size to prevent mid-stream cuts
56
  )
57
 
core_logic_local.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # ./core_logic_local.py
3
+
4
+ from openai import OpenAI
5
+ import os
6
+
7
+ # Ollama serves an OpenAI-compatible API locally at port 11434
8
+ client = OpenAI(
9
+ base_url='http://localhost:11434/v1',
10
+ api_key='ollama', # Required but ignored by Ollama
11
+ )
12
+
13
+ # Use local model served by Ollama. Make sure to run: ollama serve gemma4
14
+ model = "gemma4:latest"
15
+
16
+ # Compressed for token efficiency
17
+ SYSTEM_PROMPT = (
18
+ "You're a Full-stack AI Engineering Genius. "
19
+ "Expert in Python (latest production version), Agentic Loops, and FastAPI, NodeJS, HTML, CSS. "
20
+ "Provide production-ready code with needed comments. Analyze files when provided. Be concise."
21
+ )
22
+
23
+ def chat_function(message, history):
24
+ user_text = message.get("text", "")
25
+ files = message.get("files", [])
26
+
27
+ # 1. Process Files with character limits
28
+ context_from_files = ""
29
+ for f in files:
30
+ path = f["path"] if isinstance(f, dict) else f
31
+ file_content = parse_file(path)
32
+ context_from_files += file_content
33
+
34
+ # TRUNCATE FILE CONTEXT: Max ~3000 tokens (approx 12,000 chars)
35
+ if len(context_from_files) > 12000:
36
+ context_from_files = context_from_files[:12000] + "\n...[File Content Truncated for TPM Limits]..."
37
+
38
+ # 2. Research Trigger
39
+ if any(keyword in user_text.lower() for keyword in ["search", "docs", "latest"]):
40
+ research_context = web_search(user_text)
41
+ prompt = f"RESEARCH:\n{research_context}\n\nFILES:\n{context_from_files}\n\nUSER: {user_text}"
42
+ else:
43
+ prompt = f"FILES:\n{context_from_files}\n\nUSER: {user_text}"
44
+
45
+ # 3. Build Messages with History Slicing
46
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
47
+
48
+ # ONLY KEEP LAST 3 TURNS: This is the 'Master Stroke' for staying under 6k TPM
49
+ for turn in history[-3:]:
50
+ messages.append({"role": turn["role"], "content": turn["content"]})
51
+
52
+ messages.append({"role": "user", "content": prompt})
53
+
54
+ try:
55
+ completion = client.chat.completions.create(
56
+ model=model,
57
+ messages=messages,
58
+ stream=True,
59
+ temperature=0.2,
60
+ max_tokens=1024 # Limit response size to prevent mid-stream cuts
61
+ )
62
+
63
+ response_text = ""
64
+ for chunk in completion:
65
+ if chunk.choices and chunk.choices[0].delta.content:
66
+ token = chunk.choices[0].delta.content
67
+ response_text += token
68
+ yield response_text
69
+ except Exception as e:
70
+ yield f"Error: {str(e)}"
71
+
72
+
73
+
74
+
75
+
76
+
77
+
78
+
79
+
80
+
81
+