atacama / app.py

Upload 4 files

973ff16 verified 9 days ago

9.32 kB

	from flask import Flask, request, jsonify, render_template_string
	from flask_cors import CORS
	import torch
	import torch.nn as nn
	import time
	import os

	# Force PyTorch to use single thread (fixes slow inference on throttled CPUs)
	torch.set_num_threads(1)
	torch.set_num_interop_threads(1)
	os.environ['OMP_NUM_THREADS'] = '1'
	os.environ['MKL_NUM_THREADS'] = '1'

	# Import our model classes
	class CharTokenizer:
	def __init__(self):
	chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ "
	chars += "0123456789.,!?¿áéíóúñÁÉÍÓÚÑ"
	self.char_to_idx = {c: i+1 for i, c in enumerate(chars)}
	self.idx_to_char = {i+1: c for i, c in enumerate(chars)}
	self.vocab_size = len(self.char_to_idx) + 1

	def encode(self, text, max_len=100):
	indices = [self.char_to_idx.get(c, 0) for c in text[:max_len]]
	indices += [0] * (max_len - len(indices))
	return torch.tensor(indices, dtype=torch.long)

	class AtacamaWeatherOracle(nn.Module):
	def __init__(self, vocab_size=100, embed_dim=16, hidden_dim=32):
	super().__init__()
	self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
	self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
	self.classifier = nn.Linear(hidden_dim, 2)

	def forward(self, x):
	embedded = self.embedding(x)
	_, (hidden, _) = self.lstm(embedded)
	logits = self.classifier(hidden.squeeze(0))
	return logits

	# Initialize Flask app
	app = Flask(__name__)
	CORS(app)

	# Load the trained model
	print("Loading Atacama Weather Oracle...")
	load_start = time.time()
	tokenizer = CharTokenizer()
	model = AtacamaWeatherOracle(vocab_size=tokenizer.vocab_size)

	checkpoint = torch.load('atacama_weather_oracle.pth', map_location='cpu')
	model.load_state_dict(checkpoint['model_state_dict'])
	model.eval()
	load_time = time.time() - load_start
	print(f"✅ Oracle loaded and ready! (took {load_time:.3f}s)")

	# HTML template for the web interface
	HTML_TEMPLATE = """
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Is It Raining in Atacama?</title>
	<style>
	* {
	margin: 0;
	padding: 0;
	box-sizing: border-box;
	}
	body {
	font-family: 'Courier New', monospace;
	max-width: 700px;
	margin: 0 auto;
	padding: 40px 20px;
	background: #fafafa;
	color: #1a1a1a;
	line-height: 1.6;
	}
	.container {
	background: white;
	padding: 40px;
	border: 1px solid #e0e0e0;
	}
	h1 {
	font-size: 1.5em;
	font-weight: normal;
	margin-bottom: 8px;
	letter-spacing: -0.5px;
	}
	.subtitle {
	font-size: 0.85em;
	color: #666;
	margin-bottom: 30px;
	font-family: -apple-system, sans-serif;
	}
	.stats {
	display: inline-block;
	background: #f5f5f5;
	padding: 2px 8px;
	margin: 0 4px;
	font-size: 0.8em;
	border-radius: 2px;
	}
	input[type="text"] {
	width: 100%;
	padding: 12px;
	font-size: 15px;
	font-family: -apple-system, sans-serif;
	border: 1px solid #d0d0d0;
	margin-bottom: 12px;
	background: #fafafa;
	}
	input[type="text"]:focus {
	outline: none;
	border-color: #1a1a1a;
	background: white;
	}
	button {
	width: 100%;
	padding: 12px;
	font-size: 15px;
	font-family: 'Courier New', monospace;
	background: #1a1a1a;
	color: white;
	border: none;
	cursor: pointer;
	transition: background 0.2s;
	}
	button:hover {
	background: #333;
	}
	#result {
	margin-top: 30px;
	padding: 20px;
	background: #f9f9f9;
	border-left: 3px solid #1a1a1a;
	display: none;
	font-family: -apple-system, sans-serif;
	}
	.answer {
	font-size: 2em;
	font-weight: 300;
	margin-bottom: 8px;
	font-family: 'Courier New', monospace;
	}
	.confidence {
	font-size: 0.9em;
	color: #666;
	}
	.footer {
	margin-top: 40px;
	padding-top: 20px;
	border-top: 1px solid #e0e0e0;
	font-size: 0.8em;
	color: #999;
	font-family: -apple-system, sans-serif;
	}
	.emoji {
	font-size: 2em;
	margin-bottom: 10px;
	}
	.timing {
	margin-top: 10px;
	font-size: 0.75em;
	color: #aaa;
	font-family: 'Courier New', monospace;
	}
	</style>
	</head>
	<body>
	<div class="container">
	<h1>atacama</h1>
	<p class="subtitle">
	An ultra-small language model
	<span class="stats">7,762 parameters</span>
	<span class="stats">30KB</span>
	<span class="stats">99.9% certain</span>
	</p>

	<input type="text" id="question" placeholder="is it raining in atacama?"
	value="is it raining in atacama?">
	<button onclick="askOracle()">ask</button>

	<div id="result"></div>

	<div class="footer">
	trained on 50+ years of atacama desert weather data<br>
	last recorded rainfall: march 2015
	</div>
	</div>

	<script>
	async function askOracle() {
	const question = document.getElementById('question').value;
	const resultDiv = document.getElementById('result');

	resultDiv.style.display = 'block';
	resultDiv.innerHTML = '<p>Consulting the oracle...</p>';

	const startTime = performance.now();

	try {
	const response = await fetch('/ask', {
	method: 'POST',
	headers: {'Content-Type': 'application/json'},
	body: JSON.stringify({question: question})
	});

	const endTime = performance.now();
	const totalTime = ((endTime - startTime) / 1000).toFixed(2);

	const data = await response.json();

	const emoji = data.prob_no_rain > 0.999 ? '☀️' : '🌤️';

	resultDiv.innerHTML = `
	<div class="emoji">${emoji}</div>
	<div class="answer">${data.answer}</div>
	<div class="confidence">${data.confidence}</div>
	<div class="confidence" style="margin-top: 10px; font-size: 0.9em;">
	No rain: ${(data.prob_no_rain * 100).toFixed(2)}% \|
	Rain: ${(data.prob_rain * 100).toFixed(2)}%
	</div>
	<div class="timing">
	⏱️ total: ${totalTime}s \| server inference: ${data.inference_ms}ms
	</div>
	`;
	} catch (error) {
	resultDiv.innerHTML = '<p>Error: Could not reach the oracle</p>';
	}
	}

	// Allow Enter key to submit
	document.getElementById('question').addEventListener('keypress', function(e) {
	if (e.key === 'Enter') askOracle();
	});
	</script>
	</body>
	</html>
	"""

	@app.route('/')
	def home():
	return render_template_string(HTML_TEMPLATE)

	@app.route('/ask', methods=['POST'])
	def ask():
	request_start = time.time()

	data = request.json
	question = data.get('question', '')

	# Ask the oracle with granular timing
	t0 = time.time()
	tokens = tokenizer.encode(question).unsqueeze(0)
	t1 = time.time()

	with torch.no_grad():
	logits = model(tokens)
	t2 = time.time()

	probs = torch.softmax(logits, dim=1)[0]
	t3 = time.time()

	prob_no_rain = probs[0].item()
	prob_rain = probs[1].item()
	t4 = time.time()

	if prob_no_rain > 0.999:
	answer = "No."
	confidence = "Absolute certainty"
	elif prob_no_rain > 0.99:
	answer = "No. (But I admire your optimism)"
	confidence = "Very high confidence"
	elif prob_no_rain > 0.9:
	answer = "Almost certainly not."
	confidence = "High confidence"
	else:
	answer = "Historically unprecedented... but no."
	confidence = "Moderate confidence"

	total_time = time.time() - request_start

	# Log granular timing to server console
	print(f"TIMING: tokenize={((t1-t0)1000):.1f}ms, model={((t2-t1)1000):.1f}ms, softmax={((t3-t2)1000):.1f}ms, extract={((t4-t3)1000):.1f}ms, total={total_time*1000:.1f}ms")

	return jsonify({
	'answer': answer,
	'confidence': confidence,
	'prob_no_rain': prob_no_rain,
	'prob_rain': prob_rain,
	'inference_ms': f"{total_time*1000:.1f}",
	'debug': f"tok={((t1-t0)1000):.0f}ms model={((t2-t1)1000):.0f}ms soft={((t3-t2)*1000):.0f}ms"
	})

	@app.route('/health')
	def health():
	"""Health check endpoint - also useful for keeping the container warm"""
	return jsonify({'status': 'ok', 'model': 'loaded'})

	if __name__ == '__main__':
	import os
	port = int(os.environ.get('PORT', 5000))
	app.run(host='0.0.0.0', port=port)