atacama / app.py
AtacamaLLM's picture
Upload 4 files
973ff16 verified
from flask import Flask, request, jsonify, render_template_string
from flask_cors import CORS
import torch
import torch.nn as nn
import time
import os
# Force PyTorch to use single thread (fixes slow inference on throttled CPUs)
torch.set_num_threads(1)
torch.set_num_interop_threads(1)
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['MKL_NUM_THREADS'] = '1'
# Import our model classes
class CharTokenizer:
def __init__(self):
chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ "
chars += "0123456789.,!?¿áéíóúñÁÉÍÓÚÑ"
self.char_to_idx = {c: i+1 for i, c in enumerate(chars)}
self.idx_to_char = {i+1: c for i, c in enumerate(chars)}
self.vocab_size = len(self.char_to_idx) + 1
def encode(self, text, max_len=100):
indices = [self.char_to_idx.get(c, 0) for c in text[:max_len]]
indices += [0] * (max_len - len(indices))
return torch.tensor(indices, dtype=torch.long)
class AtacamaWeatherOracle(nn.Module):
def __init__(self, vocab_size=100, embed_dim=16, hidden_dim=32):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
self.classifier = nn.Linear(hidden_dim, 2)
def forward(self, x):
embedded = self.embedding(x)
_, (hidden, _) = self.lstm(embedded)
logits = self.classifier(hidden.squeeze(0))
return logits
# Initialize Flask app
app = Flask(__name__)
CORS(app)
# Load the trained model
print("Loading Atacama Weather Oracle...")
load_start = time.time()
tokenizer = CharTokenizer()
model = AtacamaWeatherOracle(vocab_size=tokenizer.vocab_size)
checkpoint = torch.load('atacama_weather_oracle.pth', map_location='cpu')
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()
load_time = time.time() - load_start
print(f"✅ Oracle loaded and ready! (took {load_time:.3f}s)")
# HTML template for the web interface
HTML_TEMPLATE = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Is It Raining in Atacama?</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Courier New', monospace;
max-width: 700px;
margin: 0 auto;
padding: 40px 20px;
background: #fafafa;
color: #1a1a1a;
line-height: 1.6;
}
.container {
background: white;
padding: 40px;
border: 1px solid #e0e0e0;
}
h1 {
font-size: 1.5em;
font-weight: normal;
margin-bottom: 8px;
letter-spacing: -0.5px;
}
.subtitle {
font-size: 0.85em;
color: #666;
margin-bottom: 30px;
font-family: -apple-system, sans-serif;
}
.stats {
display: inline-block;
background: #f5f5f5;
padding: 2px 8px;
margin: 0 4px;
font-size: 0.8em;
border-radius: 2px;
}
input[type="text"] {
width: 100%;
padding: 12px;
font-size: 15px;
font-family: -apple-system, sans-serif;
border: 1px solid #d0d0d0;
margin-bottom: 12px;
background: #fafafa;
}
input[type="text"]:focus {
outline: none;
border-color: #1a1a1a;
background: white;
}
button {
width: 100%;
padding: 12px;
font-size: 15px;
font-family: 'Courier New', monospace;
background: #1a1a1a;
color: white;
border: none;
cursor: pointer;
transition: background 0.2s;
}
button:hover {
background: #333;
}
#result {
margin-top: 30px;
padding: 20px;
background: #f9f9f9;
border-left: 3px solid #1a1a1a;
display: none;
font-family: -apple-system, sans-serif;
}
.answer {
font-size: 2em;
font-weight: 300;
margin-bottom: 8px;
font-family: 'Courier New', monospace;
}
.confidence {
font-size: 0.9em;
color: #666;
}
.footer {
margin-top: 40px;
padding-top: 20px;
border-top: 1px solid #e0e0e0;
font-size: 0.8em;
color: #999;
font-family: -apple-system, sans-serif;
}
.emoji {
font-size: 2em;
margin-bottom: 10px;
}
.timing {
margin-top: 10px;
font-size: 0.75em;
color: #aaa;
font-family: 'Courier New', monospace;
}
</style>
</head>
<body>
<div class="container">
<h1>atacama</h1>
<p class="subtitle">
An ultra-small language model
<span class="stats">7,762 parameters</span>
<span class="stats">30KB</span>
<span class="stats">99.9% certain</span>
</p>
<input type="text" id="question" placeholder="is it raining in atacama?"
value="is it raining in atacama?">
<button onclick="askOracle()">ask</button>
<div id="result"></div>
<div class="footer">
trained on 50+ years of atacama desert weather data<br>
last recorded rainfall: march 2015
</div>
</div>
<script>
async function askOracle() {
const question = document.getElementById('question').value;
const resultDiv = document.getElementById('result');
resultDiv.style.display = 'block';
resultDiv.innerHTML = '<p>Consulting the oracle...</p>';
const startTime = performance.now();
try {
const response = await fetch('/ask', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({question: question})
});
const endTime = performance.now();
const totalTime = ((endTime - startTime) / 1000).toFixed(2);
const data = await response.json();
const emoji = data.prob_no_rain > 0.999 ? '☀️' : '🌤️';
resultDiv.innerHTML = `
<div class="emoji">${emoji}</div>
<div class="answer">${data.answer}</div>
<div class="confidence">${data.confidence}</div>
<div class="confidence" style="margin-top: 10px; font-size: 0.9em;">
No rain: ${(data.prob_no_rain * 100).toFixed(2)}% |
Rain: ${(data.prob_rain * 100).toFixed(2)}%
</div>
<div class="timing">
⏱️ total: ${totalTime}s | server inference: ${data.inference_ms}ms
</div>
`;
} catch (error) {
resultDiv.innerHTML = '<p>Error: Could not reach the oracle</p>';
}
}
// Allow Enter key to submit
document.getElementById('question').addEventListener('keypress', function(e) {
if (e.key === 'Enter') askOracle();
});
</script>
</body>
</html>
"""
@app.route('/')
def home():
return render_template_string(HTML_TEMPLATE)
@app.route('/ask', methods=['POST'])
def ask():
request_start = time.time()
data = request.json
question = data.get('question', '')
# Ask the oracle with granular timing
t0 = time.time()
tokens = tokenizer.encode(question).unsqueeze(0)
t1 = time.time()
with torch.no_grad():
logits = model(tokens)
t2 = time.time()
probs = torch.softmax(logits, dim=1)[0]
t3 = time.time()
prob_no_rain = probs[0].item()
prob_rain = probs[1].item()
t4 = time.time()
if prob_no_rain > 0.999:
answer = "No."
confidence = "Absolute certainty"
elif prob_no_rain > 0.99:
answer = "No. (But I admire your optimism)"
confidence = "Very high confidence"
elif prob_no_rain > 0.9:
answer = "Almost certainly not."
confidence = "High confidence"
else:
answer = "Historically unprecedented... but no."
confidence = "Moderate confidence"
total_time = time.time() - request_start
# Log granular timing to server console
print(f"TIMING: tokenize={((t1-t0)*1000):.1f}ms, model={((t2-t1)*1000):.1f}ms, softmax={((t3-t2)*1000):.1f}ms, extract={((t4-t3)*1000):.1f}ms, total={total_time*1000:.1f}ms")
return jsonify({
'answer': answer,
'confidence': confidence,
'prob_no_rain': prob_no_rain,
'prob_rain': prob_rain,
'inference_ms': f"{total_time*1000:.1f}",
'debug': f"tok={((t1-t0)*1000):.0f}ms model={((t2-t1)*1000):.0f}ms soft={((t3-t2)*1000):.0f}ms"
})
@app.route('/health')
def health():
"""Health check endpoint - also useful for keeping the container warm"""
return jsonify({'status': 'ok', 'model': 'loaded'})
if __name__ == '__main__':
import os
port = int(os.environ.get('PORT', 5000))
app.run(host='0.0.0.0', port=port)