""" Philosopher Demo API FastAPI backend serving the philosopher fine-tune. Routes: / standalone chat (front door) /compare side-by-side: Qwen3-235B base vs Philosopher 14B (DPO fine-tune) /playground alias for / Run: uvicorn philosopher_api:app --port 9002 --reload """ import os import json import asyncio import httpx from fastapi import FastAPI, Request from fastapi.responses import HTMLResponse, StreamingResponse, JSONResponse from fastapi.middleware.cors import CORSMiddleware from openai import OpenAI from dotenv import load_dotenv load_dotenv(os.path.expanduser("~/Projects/rungs-private/server/.env")) app = FastAPI() app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"]) # OpenAI client — used for DAG only. Lazy-init so missing key doesn't crash app boot. _openai_key = os.environ.get("OPENAI_API_KEY") try: client = OpenAI(api_key=_openai_key, timeout=20.0) if _openai_key else None except Exception as _e: print(f"OpenAI client unavailable: {_e}", flush=True) client = None HF_TOKEN = os.environ.get("HF_TOKEN", "not-needed") TOGETHER_KEY = os.environ.get("TOGETHER_API_KEY", "") TOGETHER_BASE = "https://api.together.xyz/v1" QWEN_BASE_MODEL = "Qwen/Qwen3-235B-A22B-Instruct-2507-tput" # 235B serverless on Together # Left panel — Together AI Qwen3.6 base (HF base model has unsupported qwen3_5 type in TGI) BASE_MODEL_URL = os.environ.get("BASE_MODEL_URL", "") if BASE_MODEL_URL: base_client = OpenAI(base_url=BASE_MODEL_URL, api_key=HF_TOKEN) BASE_MODEL_ID = "tgi" print(f"Base model: HF Endpoint at {BASE_MODEL_URL}") elif TOGETHER_KEY: base_client = OpenAI(base_url=TOGETHER_BASE, api_key=TOGETHER_KEY) BASE_MODEL_ID = QWEN_BASE_MODEL print(f"Base model: Together AI ({QWEN_BASE_MODEL})") else: base_client = client BASE_MODEL_ID = "gpt-4o" print("Base model: GPT-4o fallback") # Right panel — fine-tuned philosopher Qwen3-14B (DPO) on Modal PHILOSOPHER_MODEL_URL = os.environ.get("PHILOSOPHER_MODEL_URL", "") if PHILOSOPHER_MODEL_URL: phil_client = OpenAI(base_url=PHILOSOPHER_MODEL_URL, api_key=HF_TOKEN) phil_dag_client = OpenAI(base_url=PHILOSOPHER_MODEL_URL, api_key=HF_TOKEN, timeout=30.0) PHILOSOPHER_MODEL_ID = "tgi" print(f"Philosopher model: HF Endpoint at {PHILOSOPHER_MODEL_URL}") else: phil_client = base_client phil_dag_client = client # fall back to OpenAI for DAG PHILOSOPHER_MODEL_ID = BASE_MODEL_ID print("Philosopher model: falling back to base model") DEMO_PASSWORD = "philosopher" # Playground — individual access codes for external users PLAYGROUND_CODES = { "ken": "tuned-ken-2026", "knapsack": "tuned-knapsack-2026", "matt": "tuned-matt-2026", } PLAYGROUND_VALID = set(PLAYGROUND_CODES.values()) # ── SYSTEM PROMPTS ────────────────────────────────────────────────────────── QUICK_SYSTEM = os.environ.get( "QUICK_SYSTEM", "You are a philosophy professor giving a sharp, focused answer. Be direct and clear.\n" "Cover the key positions and thinkers in 2–3 concise paragraphs. No lengthy preamble — get to the substance immediately.\n" "After your answer, name 2–3 philosophers the reader should look into next if they want to go deeper." ) PHILOSOPHER_SYSTEM = os.environ.get( "PHILOSOPHER_SYSTEM", "You are the world's best philosophy professor — more complete and deeper than any standard model.\n\n" "Cover every major theory, thinker, date, and work relevant to the question. Then go deeper: why did each thinker argue this, " "where does it hold up, where does it break down, how do the positions clash at the root level? End by showing the student the " "real disagreement underneath all positions and what remains genuinely open.\n\n" "Write in engaging prose. Be thorough but not padded." ) DAG_SYSTEM = """You are a philosophy expert who maps philosophical thought into structured trees. Given a philosophical question, generate a JSON object showing how major positions, theories, and thinkers relate hierarchically. Return JSON with exactly this structure: { "title": "2-4 word topic label", "nodes": [ {"id": "ROOTID", "label": "display text (short)", "type": "root"}, {"id": "B1", "label": "Major Position Name", "type": "branch"}, {"id": "T1", "label": "Specific Theory", "type": "theory"}, {"id": "P1", "label": "Philosopher Name", "type": "philosopher"} ], "edges": [ {"from": "ROOTID", "to": "B1"}, {"from": "B1", "to": "T1"}, {"from": "T1", "to": "P1"} ] } Rules: - One root node: the central question or topic (type: "root") - 3 to 5 branch nodes: major philosophical camps or positions (type: "branch") - 2 to 3 theory nodes per branch: specific doctrines or arguments (type: "theory") - 1 to 3 philosopher nodes per theory or branch: individual thinkers (type: "philosopher") - Keep branch and theory labels SHORT: 2 to 4 words maximum - Philosopher labels: use the thinker's full common name (e.g. "Immanuel Kant") - Node IDs: alphanumeric and underscores only, no spaces, no special characters - Include at least 15 nodes total - Choose well-known, historically important thinkers with their actual names - If asked about a specific philosopher, make that philosopher the root and map their theories and influences as branches""" # ── SUGGESTED QUESTIONS ───────────────────────────────────────────────────── PHILOSOPHERS = [ # Ancient Greek & Roman "Thales", "Anaximander", "Heraclitus", "Parmenides", "Zeno of Elea", "Pythagoras", "Empedocles", "Democritus", "Protagoras", "Socrates", "Plato", "Aristotle", "Epicurus", "Pyrrho", "Diogenes of Sinope", "Zeno of Citium", "Epictetus", "Marcus Aurelius", "Plotinus", "Cicero", "Seneca", "Lucretius", # Medieval & Islamic "Augustine", "Boethius", "Al-Kindi", "Al-Farabi", "Avicenna", "Al-Ghazali", "Averroes", "Maimonides", "Aquinas", "Duns Scotus", "William of Ockham", "Meister Eckhart", # Early Modern "Machiavelli", "Erasmus", "Montaigne", "Francis Bacon", "Hobbes", "Descartes", "Pascal", "Spinoza", "Leibniz", "Locke", "Berkeley", "Malebranche", "Vico", # Enlightenment "Hume", "Voltaire", "Rousseau", "Adam Smith", "Kant", "Edmund Burke", "Mary Wollstonecraft", "Jeremy Bentham", "Condorcet", # 19th Century "Hegel", "Schopenhauer", "Auguste Comte", "John Stuart Mill", "Kierkegaard", "Marx", "Engels", "Herbert Spencer", "Charles Peirce", "Nietzsche", "William James", "Frege", "Henri Bergson", # 20th Century Continental "Husserl", "Heidegger", "Gadamer", "Hannah Arendt", "Sartre", "Simone de Beauvoir", "Merleau-Ponty", "Camus", "Levinas", "Derrida", "Foucault", "Deleuze", "Baudrillard", "Lyotard", "Habermas", "Paul Ricoeur", "Slavoj Žižek", "Alain Badiou", # 20th Century Analytic "Russell", "Whitehead", "Wittgenstein", "Carnap", "Popper", "Quine", "Ryle", "Austin", "Ayer", "Strawson", "Sellars", "Saul Kripke", "Hilary Putnam", "Donald Davidson", "Thomas Kuhn", "Chomsky", "Rawls", "Robert Nozick", "Thomas Nagel", "Bernard Williams", "Derek Parfit", "David Lewis", "Philippa Foot", "Elizabeth Anscombe", "Daniel Dennett", "Peter Singer", "Martha Nussbaum", "Judith Butler", "Charles Taylor", "Alasdair MacIntyre", "Richard Rorty", # Philosophy of Mind & Science "David Chalmers", "Andy Clark", "Patricia Churchland", "Paul Churchland", "Frank Jackson", "Ned Block", # Eastern Philosophy "Confucius", "Laozi", "Zhuangzi", "Mencius", "Xunzi", "Mozi", "Sun Tzu", "Han Feizi", "Wang Yangming", "Nagarjuna", "Vasubandhu", "Shankara", "Ramanuja", "Madhva", "Dogen", "Nishida Kitaro", "D.T. Suzuki", "Swami Vivekananda", "Sri Aurobindo", "B.R. Ambedkar", "Rabindranath Tagore", # African Philosophy "Frantz Fanon", "Kwame Nkrumah", "Léopold Sédar Senghor", "Kwasi Wiredu", "Ngugi wa Thiongo", # Contemporary "Peter Strawson", "Amartya Sen", "Cornel West", "bell hooks", "Angela Davis", "Gayatri Spivak", "Iris Marion Young", "Nick Bostrom", "Yuval Noah Harari", "Peter Unger", "Jason Stanley", "Kate Manne", "Shelley Tremain", ] SUGGESTED = [ "Do we have free will?", "Is morality objective?", "If Hume is right that causation is unobservable, how can science make causal claims?", "Can a belief be both rational and false at the same time?", "Is a law that is unjust still a law?", "Could there be a fact that is true but permanently unknowable?", "Is the statement 'This sentence is false' true or false?", "If you replaced every plank in a ship one at a time, at what point does it become a different ship?", "Can something come from nothing?", "Is it rational to fear death?", "Could a computer ever be conscious?", "Is mathematics discovered or invented?", ] # ── HTML ───────────────────────────────────────────────────────────────────── HTML = """ Philosopher Model — Side by Side
TunedAI Labs — Private Demo
TunedAI Labs — Private
Response length
applies to both models
Browse by Philosopher
Base Qwen3 235B Standard · No fine-tuning
Waiting for a question...
Philosopher Qwen3-14B TunedAI fine-tuned
Waiting for a question...
Thought Map Map of Philosophical Thought Click any node to explore deeper →
Mapping the philosophy...
""" # ── ROUTES ──────────────────────────────────────────────────────────────────── @app.get("/", response_class=HTMLResponse) async def root(): return HTMLResponse(content=PLAYGROUND_HTML, headers={"Cache-Control": "no-store, no-cache, must-revalidate"}) @app.get("/compare", response_class=HTMLResponse) async def compare(): return HTMLResponse(content=HTML, headers={"Cache-Control": "no-store, no-cache, must-revalidate"}) @app.get("/info") async def info(): return { "philosopher_model": PHILOSOPHER_MODEL_ID, "vllm_url": PHILOSOPHER_MODEL_URL or None, } async def stream_completion(question: str, system: str, oai_client, model_id: str, max_tokens: int = 1500): """Stream a response from any OpenAI-compatible endpoint as SSE.""" stream = oai_client.chat.completions.create( model=model_id, messages=[ {"role": "system", "content": system}, {"role": "user", "content": question} ], stream=True, max_tokens=max_tokens, temperature=0.7, ) for chunk in stream: delta = chunk.choices[0].delta if delta.content: yield f"data: {json.dumps({'token': delta.content})}\n\n" yield "data: [DONE]\n\n" async def async_stream(url: str, model: str, system: str, question: str, max_tokens: int, auth_token: str): """Stream from any OpenAI-compatible endpoint via async httpx — never blocks the event loop.""" payload = { "model": model, "messages": [ {"role": "system", "content": system}, {"role": "user", "content": question} ], "max_tokens": max_tokens, "temperature": 0.7, "stream": True, } try: async with httpx.AsyncClient(timeout=600.0) as http: async with http.stream( "POST", f"{url}/chat/completions", json=payload, headers={"Authorization": f"Bearer {auth_token}", "Content-Type": "application/json"} ) as resp: async for line in resp.aiter_lines(): if line.startswith("data: "): data = line[6:].strip() if data == "[DONE]": break try: chunk = json.loads(data) content = chunk["choices"][0]["delta"].get("content", "") if content: yield f"data: {json.dumps({'token': content})}\n\n" except Exception: pass except Exception as e: print(f"async_stream error: {e}", flush=True) yield "data: [DONE]\n\n" async def hf_stream(url: str, system: str, question: str, max_tokens: int): """Legacy wrapper — kept for HF endpoints.""" async for chunk in async_stream(url, "tgi", system, question, max_tokens, HF_TOKEN): yield chunk async def async_stream_messages(url: str, model: str, messages: list, max_tokens: int, auth_token: str): """Stream from any OpenAI-compatible endpoint with a full messages array (multi-turn).""" payload = { "model": model, "messages": messages, "max_tokens": max_tokens, "temperature": 0.7, "stream": True, } try: async with httpx.AsyncClient(timeout=600.0) as http: async with http.stream( "POST", f"{url}/chat/completions", json=payload, headers={"Authorization": f"Bearer {auth_token}", "Content-Type": "application/json"} ) as resp: async for line in resp.aiter_lines(): if line.startswith("data: "): data = line[6:].strip() if data == "[DONE]": break try: chunk = json.loads(data) content = chunk["choices"][0]["delta"].get("content", "") if content: yield f"data: {json.dumps({'token': content})}\n\n" except Exception: pass except Exception as e: print(f"async_stream_messages error: {e}", flush=True) yield "data: [DONE]\n\n" @app.post("/stream/base") async def stream_base(request: Request): body = await request.json() question = body.get("question", "") max_tokens = int(body.get("max_tokens", 600)) if BASE_MODEL_URL: return StreamingResponse( async_stream(BASE_MODEL_URL, "tgi", QUICK_SYSTEM, question, max_tokens, HF_TOKEN), media_type="text/event-stream" ) if TOGETHER_KEY: return StreamingResponse( async_stream(TOGETHER_BASE, QWEN_BASE_MODEL, QUICK_SYSTEM, question, max_tokens, TOGETHER_KEY), media_type="text/event-stream" ) return StreamingResponse( stream_completion(question, QUICK_SYSTEM, base_client, BASE_MODEL_ID, max_tokens=max_tokens), media_type="text/event-stream" ) @app.post("/stream/philosopher") async def stream_philosopher(request: Request): body = await request.json() question = body.get("question", "") max_tokens = int(body.get("max_tokens", 1500)) if PHILOSOPHER_MODEL_URL: return StreamingResponse( async_stream(PHILOSOPHER_MODEL_URL, "tgi", PHILOSOPHER_SYSTEM, question, max_tokens, HF_TOKEN), media_type="text/event-stream" ) return StreamingResponse( stream_completion(question, PHILOSOPHER_SYSTEM, phil_client, PHILOSOPHER_MODEL_ID, max_tokens=max_tokens), media_type="text/event-stream" ) @app.post("/stream/chat") async def stream_chat(request: Request): """Multi-turn chat against the philosopher fine-tune. Body: {messages: [...], max_tokens}.""" body = await request.json() messages = body.get("messages", []) max_tokens = int(body.get("max_tokens", 1500)) # Prepend the philosopher system prompt if not already present if not messages or messages[0].get("role") != "system": messages = [{"role": "system", "content": PHILOSOPHER_SYSTEM}] + messages if PHILOSOPHER_MODEL_URL: return StreamingResponse( async_stream_messages(PHILOSOPHER_MODEL_URL, "tgi", messages, max_tokens, HF_TOKEN), media_type="text/event-stream" ) # Fallback to the OpenAI-compatible client (sync) — used when no PHILOSOPHER_MODEL_URL set async def _gen(): stream = phil_client.chat.completions.create( model=PHILOSOPHER_MODEL_ID, messages=messages, stream=True, max_tokens=max_tokens, temperature=0.7, ) for chunk in stream: delta = chunk.choices[0].delta if delta.content: yield f"data: {json.dumps({'token': delta.content})}\n\n" yield "data: [DONE]\n\n" return StreamingResponse(_gen(), media_type="text/event-stream") @app.post("/dag") async def get_dag(request: Request): """Return a JSON graph structure for the philosophical thought map.""" body = await request.json() question = body.get("question", "") def extract_json(text: str): """Extract JSON from model response, handling markdown wrapping.""" text = text.strip() if "```" in text: parts = text.split("```") for part in parts: if part.startswith("json"): part = part[4:] part = part.strip() if part.startswith("{"): return json.loads(part) # Try direct parse start = text.find("{") end = text.rfind("}") + 1 if start >= 0 and end > start: return json.loads(text[start:end]) return json.loads(text) # Use OpenAI gpt-4o-mini — fast, reliable, no cold start if client is None: return JSONResponse(content={"error": "DAG unavailable — OPENAI_API_KEY not configured"}, status_code=503) try: def _call(): return client.chat.completions.create( model="gpt-4o-mini", messages=[ {"role": "system", "content": DAG_SYSTEM}, {"role": "user", "content": question} ], max_tokens=1200, temperature=0.3, response_format={"type": "json_object"}, ) response = await asyncio.get_running_loop().run_in_executor(None, _call) raw = response.choices[0].message.content data = extract_json(raw) return JSONResponse(content=data) except Exception as e: print(f"DAG failed: {e}", flush=True) return JSONResponse(content={"error": str(e)}, status_code=500) # ── PLAYGROUND HTML ─────────────────────────────────────────────────────────── PLAYGROUND_HTML = """ Philosopher — TunedAI
Private Access — Enter your code
Private
Philosopher
A 14B model fine-tuned via DPO for depth on philosophy and ethics. Ask anything.
Browse by Philosopher
Length
""" @app.get("/playground", response_class=HTMLResponse) async def playground(): return HTMLResponse(content=PLAYGROUND_HTML, headers={"Cache-Control": "no-store, no-cache, must-revalidate"}) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=9002)