Spaces:
Running
Running
Commit ·
c505932
1
Parent(s): b83d944
updated core logic local
Browse files- core_logic_local.py +53 -2
core_logic_local.py
CHANGED
|
@@ -7,20 +7,71 @@ Max Tokens: Increased for local version since there is neither the cost is incur
|
|
| 7 |
. perform thorough code review,
|
| 8 |
. write deeper code analysis,
|
| 9 |
. produce comprehensive solutions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
"""
|
| 11 |
|
| 12 |
from openai import OpenAI
|
| 13 |
from tools import web_search, parse_file
|
| 14 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# Ollama serves an OpenAI-compatible API locally at port 11434
|
| 17 |
client = OpenAI(
|
| 18 |
-
base_url=
|
| 19 |
api_key='ollama', # Required but ignored by Ollama
|
| 20 |
)
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
# Use local model served by Ollama. Make sure to run: ollama serve gemma4
|
| 23 |
-
model = "gemma4:latest"
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
SYSTEM_PROMPT = """
|
| 26 |
You are the 'Silicon Architect' — a full-stack, master-stroke creative genius in AI Engineering and Technical Architecture.
|
|
|
|
| 7 |
. perform thorough code review,
|
| 8 |
. write deeper code analysis,
|
| 9 |
. produce comprehensive solutions
|
| 10 |
+
|
| 11 |
+
# /v1 Necessity: The /v1 is essential in the base_url for the OpenAI library to correctly route requests to Ollama's API; even though Chrome shows "Ollama is running" message at http://127.0.01:11434, i.e., without "/v1".
|
| 12 |
+
|
| 13 |
+
"First Principles" breakdown of why this is necessary:
|
| 14 |
+
|
| 15 |
+
1. The Browser vs. The API
|
| 16 |
+
When visiting 127.0.0.1:11434 in Chrome, we hit the Base URL, Ollama sends back that simple text message just to confirm the service is alive.
|
| 17 |
+
|
| 18 |
+
However, Python code doesn't just check if Ollama is alive, it tries to have a conversation; and for that, it needs to talk to a specific Endpoint (a specific door in the building).
|
| 19 |
+
|
| 20 |
+
2. OpenAI Compatibility (The Industry Standard)
|
| 21 |
+
Ollama was designed to be a "drop-in replacement" for OpenAI. Almost every AI library (like the openai Python library) expects a standard URL structure called the OpenAI Chat Completions API.
|
| 22 |
+
|
| 23 |
+
The standard structure looks like this:
|
| 24 |
+
|
| 25 |
+
Base URL: http://localhost:11434
|
| 26 |
+
|
| 27 |
+
Version Prefix: /v1
|
| 28 |
+
|
| 29 |
+
Action: /chat/completions
|
| 30 |
+
|
| 31 |
+
When we set base_url='http://localhost:11434/v1', the OpenAI library automatically attaches /chat/completions to the end of it.
|
| 32 |
+
|
| 33 |
+
3. What happens if "/v1" is removed?
|
| 34 |
+
The library will try to send the data to http://localhost:11434/chat/completions, but because that URL is missing the "/v1" prefix, Ollama’s "OpenAI Compatibility" layer won't recognize the request, and either a "404 Not Found" or a "405 Method Not Allowed" may be encountered.
|
| 35 |
+
|
| 36 |
+
Summary Checklist:
|
| 37 |
+
In Chrome: Use 127.0.0.1:11434 - to see if it's on.
|
| 38 |
+
In Python Code: Use 127.0.0.1:11434/v1 - to actually send prompts.
|
| 39 |
+
|
| 40 |
"""
|
| 41 |
|
| 42 |
from openai import OpenAI
|
| 43 |
from tools import web_search, parse_file
|
| 44 |
import os
|
| 45 |
+
import socket
|
| 46 |
+
|
| 47 |
+
def get_base_url():
|
| 48 |
+
# Check if we are inside WSL
|
| 49 |
+
if os.path.exists('/proc/version'):
|
| 50 |
+
with open('/proc/version', 'r') as f:
|
| 51 |
+
if 'microsoft' in f.read().lower():
|
| 52 |
+
# if running the script from inside the Ubuntu (WSL) terminal, point to the Windows host
|
| 53 |
+
return "http://172.17.0.1:11434/v1"
|
| 54 |
+
# Otherwise, assume we are on the native Windows host, running the script from Windows Powershell/CMD, and point to localhost
|
| 55 |
+
return "http://127.0.0.1:11434/v1"
|
| 56 |
|
| 57 |
# Ollama serves an OpenAI-compatible API locally at port 11434
|
| 58 |
client = OpenAI(
|
| 59 |
+
base_url=get_base_url(),
|
| 60 |
api_key='ollama', # Required but ignored by Ollama
|
| 61 |
)
|
| 62 |
|
| 63 |
+
"""
|
| 64 |
+
client = OpenAI(
|
| 65 |
+
base_url='http://localhost:11434/v1',
|
| 66 |
+
api_key="ollama"
|
| 67 |
+
)
|
| 68 |
+
"""
|
| 69 |
+
|
| 70 |
# Use local model served by Ollama. Make sure to run: ollama serve gemma4
|
| 71 |
+
#model = "gemma4:latest"
|
| 72 |
+
model = "llama3:latest" # better than llama3.2:latest and phi3:latest
|
| 73 |
+
#model = "llama3.2:latest"
|
| 74 |
+
#model = "phi3:latest"
|
| 75 |
|
| 76 |
SYSTEM_PROMPT = """
|
| 77 |
You are the 'Silicon Architect' — a full-stack, master-stroke creative genius in AI Engineering and Technical Architecture.
|