Spaces:

compendious
/

precis

Build error

App Files Files Community

precis / backend /ollama.py

compendious

more data cleaning. Tuning data and then tuning the model is next

f179148 about 1 month ago

raw

history blame contribute delete

5.55 kB

	from typing import Optional
	import json
	import os

	import httpx
	from fastapi import HTTPException
	from fastapi.responses import StreamingResponse

	from config import (
	OLLAMA_BASE_URL, DEFAULT_MODEL, AVAILABLE_MODELS,
	MAX_SUMMARY_TOKENS, TEMPERATURE,
	)


	def build_prompt(title: Optional[str], text: str) -> str:
	if title:
	instructions = (
	f'The article is titled "{title}". '
	"If the title is a question, answer it directly in one sentence using only facts from the article. "
	"If the title is not a question, write one sentence that gives a concise, high-level overview "
	"of the article, briefly enumerating all key facts."
	)
	else:
	instructions = (
	"Write one sentence that gives a concise, high-level overview of the article, "
	"briefly enumerating all key facts."
	)
	return (
	f"{instructions}\n"
	"Do not add opinions, commentary, or filler phrases like 'The article discusses' or 'This document provides'.\n"
	"or any similar phrasing, whether the similarity be in meaning or otherwise. Get straight to the point."
	"Output the summary sentence only. The sentence should be no longer than 200 characetrs long. Nothing else should be included.\n\n"
	f"Article:\n{text}\n\n"
	"Summary:"
	)


	def resolve_model(model: Optional[str]) -> str:
	requested = model or ""

	# Prefer what Ollama actually has installed.
	try:
	with httpx.Client(timeout=5.0) as client:
	r = client.get(f"{OLLAMA_BASE_URL}/api/tags")
	r.raise_for_status()
	payload = r.json() if r.content else {}
	installed = [m.get("name") for m in payload.get("models", []) if m.get("name")]
	except Exception:
	installed = []

	if installed:
	if not requested:
	return DEFAULT_MODEL if DEFAULT_MODEL in installed else installed[0]
	if requested not in installed:
	raise HTTPException(
	status_code=400,
	detail=(
	f"Model '{requested}' is not installed in Ollama. "
	f"Installed: {installed}. Run `ollama pull {requested}`."
	),
	)
	return requested

	# Fallback: use configured allowlist when Ollama isn't reachable.
	if not requested:
	return DEFAULT_MODEL
	if requested not in AVAILABLE_MODELS:
	raise HTTPException(
	status_code=400,
	detail=f"Unknown model '{requested}'. Available: {AVAILABLE_MODELS}",
	)
	return requested


	def ensure_ollama_reachable() -> None:
	try:
	with httpx.Client(timeout=10.0) as client:
	response = client.get(f"{OLLAMA_BASE_URL}/api/tags")
	response.raise_for_status()
	except httpx.ConnectError:
	raise HTTPException(
	status_code=503,
	detail="Cannot reach Ollama. Make sure `ollama serve` is running.",
	)
	except httpx.HTTPError as exc:
	raise HTTPException(
	status_code=503,
	detail=f"Ollama responded with an error: {exc}",
	)


	async def ollama_stream(prompt: str, model: str):
	"""Async generator: yields NDJSON lines from Ollama, filtering out thinking-only chunks."""
	keep_alive = os.getenv("OLLAMA_KEEP_ALIVE", "30m")
	# Set num_predict high so thinking tokens don't limit output.
	num_predict = MAX_SUMMARY_TOKENS * 3
	payload = {
	"model": model,
	"prompt": prompt,
	"stream": True,
	"keep_alive": keep_alive,
	"options": {
	"num_predict": num_predict,
	"temperature": TEMPERATURE,
	"stop": ["Article:", "Title:"],
	},
	}
	async with httpx.AsyncClient(timeout=300.0) as client:
	try:
	async with client.stream(
	"POST", f"{OLLAMA_BASE_URL}/api/generate", json=payload,
	) as resp:
	resp.raise_for_status()
	async for line in resp.aiter_lines():
	if not line:
	continue
	try:
	chunk = json.loads(line)
	# Skips thinking-only chunks.
	if chunk.get("response"):
	yield line + "\n"
	except json.JSONDecodeError:
	yield line + "\n"
	except httpx.ConnectError:
	error_line = json.dumps({
	"error": "Cannot reach Ollama. Make sure `ollama serve` is running.",
	})
	yield error_line + "\n"
	except httpx.TimeoutException:
	error_line = json.dumps({
	"error": "Ollama timed out. The model may still be loading — try again in a moment.",
	})
	yield error_line + "\n"
	except httpx.HTTPError as exc:
	error_line = json.dumps({
	"error": f"Ollama error: {exc}",
	})
	yield error_line + "\n"


	def stream_summary(
	text: str,
	title: Optional[str] = None,
	model: Optional[str] = None,
	) -> StreamingResponse:
	"""Universal funnel: text -> prompt -> Ollama stream -> NDJSON response."""
	ensure_ollama_reachable()
	resolved = resolve_model(model)
	prompt = build_prompt(title, text)
	return StreamingResponse(
	ollama_stream(prompt, resolved),
	media_type="application/x-ndjson",
	headers={"X-Accel-Buffering": "no"},
	)