Spaces:

LeoneNL
/

SprintReview

Sleeping

App Files Files Community

SprintReview / app.py

LeoneNL

Create app.py

8115fb9 verified 10 days ago

raw

history blame contribute delete

2.53 kB

	import os
	import gradio as gr
	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama

	# Pick ONE GGUF repo + filename that exists in that repo.
	# Examples you can use:
	# - repo_id="bartowski/gemma-2-2b-it-GGUF" (various quants) :contentReference[oaicite:5]{index=5}
	# - repo_id="BafS/gemma-2-2b-it-Q4_K_M-GGUF" (single-file Q4_K_M) :contentReference[oaicite:6]{index=6}
	REPO_ID = os.getenv("GGUF_REPO_ID", "BafS/gemma-2-2b-it-Q4_K_M-GGUF")
	FILENAME = os.getenv("GGUF_FILENAME", "gemma-2-2b-it-q4_k_m.gguf") # adjust if repo uses different name

	# Optional: if the repo is gated, HF_TOKEN helps.
	HF_TOKEN = os.getenv("HF_TOKEN")

	llm = None

	def _load_model():
	global llm
	if llm is not None:
	return

	model_path = hf_hub_download(
	repo_id=REPO_ID,
	filename=FILENAME,
	token=HF_TOKEN, # works if gated; harmless if public :contentReference[oaicite:7]{index=7}
	)

	# Conservative defaults for HF CPU Basic
	llm = Llama(
	model_path=model_path,
	n_ctx=2048,
	n_threads=max(1, os.cpu_count() or 2),
	n_batch=256,
	verbose=False,
	)

	SYSTEM_PROMPT = (
	"You are a product delivery analyst. "
	"Write a concise executive summary in English based ONLY on the data provided. "
	"Be specific with numbers and percentages. Do not invent data."
	)

	def summarize(period_json: str, max_tokens: int = 350, temperature: float = 0.2):
	_load_model()

	prompt = f"""<system>{SYSTEM_PROMPT}</system>
	<user>
	Output format:
	1) Overall health (1 sentence)
	2) Capacity vs scope (2 bullets)
	3) Delivery & predictability (2 bullets)
	4) Churn & stability (2 bullets)
	5) Risks / hotspots (2 bullets, name components)
	6) Recommendation for next period (2 bullets)

	Data:
	{period_json}
	</user>
	<assistant>
	"""

	out = llm(
	prompt,
	max_tokens=int(max_tokens),
	temperature=float(temperature),
	top_p=0.9,
	stop=["</assistant>"],
	)
	return out["choices"][0]["text"].strip()

	with gr.Blocks() as demo:
	gr.Markdown("## Gemma 2B (GGUF) – Executive Summary API")
	inp = gr.Textbox(label="Period summary JSON / text", lines=12, placeholder="{ ... }")
	max_t = gr.Slider(64, 700, value=350, step=1, label="max_tokens")
	temp = gr.Slider(0.0, 1.0, value=0.2, step=0.05, label="temperature")
	out = gr.Textbox(label="Summary (EN)", lines=14)
	btn = gr.Button("Summarize")
	btn.click(summarize, inputs=[inp, max_t, temp], outputs=out, api_name="summarize")

	demo.launch()