Spaces:

ethnmcl
/

EntreprenurialReadinessChatbot

Sleeping

App Files Files Community

EntreprenurialReadinessChatbot / app.py

ethnmcl

Create app.py

33dc957 verified 5 months ago

raw

history blame contribute delete

8.76 kB

	import os
	import uuid
	import json
	import requests
	from typing import Dict, Any, Optional

	from fastapi import FastAPI, Body
	from pydantic import BaseModel, Field
	from fastapi.middleware.cors import CORSMiddleware

	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# ---------------------------
	# Config
	# ---------------------------
	BASE_MODEL_ID = os.environ.get("BASE_MODEL_ID", "TinyLlama/TinyLlama-1.1B-Chat-v1.0")
	ADAPTER_ID = os.environ.get("ADAPTER_ID", "ethnmcl/tinyllama-entrepreneurchatbot-lora")
	XGB_SCORE_URL = os.environ.get("XGB_SCORE_URL", "https://ethnmcl-EntrepreneurialReadinessScoreAPI.hf.space/score")

	SYSTEM_PROMPT = (
	"You are a concise entrepreneurial readiness assistant. "
	"Be clear, specific, and professional."
	)

	QUESTION_FLOW = [
	{"key": "age", "question": "What is your age? (number)"},
	{"key": "savings", "question": "How much do you currently have saved (USD)?"},
	{"key": "monthly_expense_ratio", "question": "What is your monthly expense ratio (expenses/income)? (e.g., 0.55)"},
	{"key": "sales_experience", "question": "Rate your sales experience from 0–10."},
	{"key": "dependents", "question": "How many dependents do you support? (number)"},
	{"key": "weekly_time_commitment", "question": "How many hours/week can you commit to your venture?"},
	]
	TYPE_CASTS = {
	"age": float,
	"savings": float,
	"monthly_expense_ratio": float,
	"sales_experience": float,
	"dependents": float,
	"weekly_time_commitment": float,
	}

	EXAMPLES = [
	"Can you explain what the entrepreneurial readiness check is in one or two sentences?",
	"I am 31, with $8,000 savings and an expense ratio of 0.72. What does that say about my readiness?",
	"I’m 27, working 15 hours a week on my business, with 2 dependents. How might that affect my entrepreneurial score?",
	"Inputs → age 29, savings 5000, expense ratio 0.62, sales experience 4, dependents 1, hours/week 12. Summarize likely strengths and risks in bullet points.",
	"Based on these inputs (savings 3000, expense ratio 0.85, sales exp 2), what 3 actions should I take to improve my readiness?",
	"I feel nervous about launching with only $2,000 saved. Can you give me encouragement and one practical step?",
	]

	# ---------------------------
	# Lightweight session store
	# ---------------------------
	SESSIONS: Dict[str, Dict[str, Any]] = {}

	def new_session() -> str:
	sid = uuid.uuid4().hex
	SESSIONS[sid] = {"answers": {}, "idx": 0, "active": False, "scored": False, "score": None}
	return sid

	def get_session(session_id: Optional[str]) -> str:
	if session_id and session_id in SESSIONS:
	return session_id
	return new_session()

	# ---------------------------
	# Model loading (GPU 4-bit if available)
	# ---------------------------
	def load_model():
	global tokenizer, model
	tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID, use_fast=True)

	use_4bit = torch.cuda.is_available()
	kwargs = {"device_map": "auto"}
	if use_4bit:
	# 4-bit quant only when CUDA available (bitsandbytes)
	kwargs.update(dict(load_in_4bit=True, torch_dtype=torch.float16))
	else:
	# CPU fallback
	kwargs.update(dict(torch_dtype=torch.float32))

	base = AutoModelForCausalLM.from_pretrained(BASE_MODEL_ID, **kwargs)
	try:
	from peft import PeftModel
	model = PeftModel.from_pretrained(base, ADAPTER_ID)
	except Exception as e:
	# If adapter load fails, proceed with base (but log)
	print(f"[WARN] Failed to load PEFT adapter {ADAPTER_ID}: {e}")
	model = base

	model.eval()
	print("[INFO] Model ready. CUDA:", torch.cuda.is_available())

	def llm_reply(system: str, user: str, max_new_tokens=180, temperature=0.2) -> str:
	prompt = f"<\|system\|>\n{system}\n<\|user\|>\n{user}\n<\|assistant\|>\n"
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
	with torch.inference_mode():
	out = model.generate(
	**inputs,
	max_new_tokens=max_new_tokens,
	do_sample=temperature > 0,
	temperature=temperature,
	pad_token_id=tokenizer.eos_token_id,
	)
	text = tokenizer.decode(out[0], skip_special_tokens=True)
	return text.split("<\|assistant\|>")[-1].strip()

	def next_question(state: Dict[str, Any]) -> Optional[str]:
	if state["idx"] < len(QUESTION_FLOW):
	return QUESTION_FLOW[state["idx"]]["question"]
	return None

	def cast_features(answers: Dict[str, str]) -> Dict[str, Any]:
	out = {}
	for k, v in answers.items():
	caster = TYPE_CASTS.get(k, str)
	try:
	out[k] = caster(v)
	except Exception:
	out[k] = v
	return out

	def score_via_api(features: Dict[str, Any]) -> Dict[str, Any]:
	try:
	r = requests.post(XGB_SCORE_URL, json={"features": features}, timeout=20)
	r.raise_for_status()
	return r.json()
	except Exception as e:
	return {"error": f"Scoring API error: {e}"}

	# ---------------------------
	# FastAPI
	# ---------------------------
	app = FastAPI(title="Entrepreneurial Readiness Chat API", version="1.0.0")
	app.add_middleware(
	CORSMiddleware,
	allow_origins=[""], allow_credentials=True, allow_methods=[""], allow_headers=["*"]
	)

	class ChatIn(BaseModel):
	message: str = Field(..., description="User message")
	session_id: Optional[str] = Field(None, description="Session ID (optional)")

	class ChatOut(BaseModel):
	session_id: str
	reply: str
	assessment: Dict[str, Any]

	class StartOut(BaseModel):
	session_id: str
	question: str
	idx: int
	total: int

	class AnswerIn(BaseModel):
	session_id: str
	answer: str

	class AnswerOut(BaseModel):
	session_id: str
	done: bool
	question: Optional[str] = None
	idx: int
	total: int
	score: Optional[Any] = None
	error: Optional[str] = None

	@app.on_event("startup")
	def _startup():
	load_model()

	@app.get("/health")
	def health():
	return {"ok": True}

	@app.get("/examples")
	def examples():
	return {"examples": EXAMPLES}

	@app.post("/assessment/start", response_model=StartOut)
	def assessment_start(payload: Dict[str, Any] = Body(default={})):
	sid = get_session(payload.get("session_id"))
	st = SESSIONS[sid]
	st.update({"answers": {}, "idx": 0, "active": True, "scored": False, "score": None})
	q = next_question(st)
	return {"session_id": sid, "question": q, "idx": st["idx"] + 1, "total": len(QUESTION_FLOW)}

	@app.post("/assessment/answer", response_model=AnswerOut)
	def assessment_answer(inp: AnswerIn):
	if inp.session_id not in SESSIONS:
	return AnswerOut(session_id=new_session(), done=False, idx=0, total=len(QUESTION_FLOW), error="Invalid session_id")

	st = SESSIONS[inp.session_id]
	if not st.get("active"):
	return AnswerOut(session_id=inp.session_id, done=False, idx=st["idx"], total=len(QUESTION_FLOW), error="Assessment not active.")

	cur_key = QUESTION_FLOW[st["idx"]]["key"]
	st["answers"][cur_key] = inp.answer.strip()
	st["idx"] += 1

	q = next_question(st)
	if q is None:
	st["active"] = False
	features = cast_features(st["answers"])
	res = score_via_api(features)
	st["scored"] = "error" not in res
	st["score"] = res
	return AnswerOut(session_id=inp.session_id, done=True, idx=len(QUESTION_FLOW), total=len(QUESTION_FLOW), score=res)
	else:
	return AnswerOut(session_id=inp.session_id, done=False, question=q, idx=st["idx"] + 1, total=len(QUESTION_FLOW))

	@app.post("/chat", response_model=ChatOut)
	def chat(inp: ChatIn):
	sid = get_session(inp.session_id)
	st = SESSIONS[sid]

	# If the user asks to take the assessment, start it.
	msg_lower = inp.message.lower()
	triggers = ["take the entrepreneurial readiness assessment", "take assessment", "start assessment", "readiness assessment"]
	if any(t in msg_lower for t in triggers):
	st.update({"answers": {}, "idx": 0, "active": True, "scored": False, "score": None})
	q = next_question(st)
	reply = (
	"Great—let’s do a short 6-question entrepreneurial readiness check.\n\n"
	f"Q1/6: {q}"
	)
	return ChatOut(
	session_id=sid,
	reply=reply,
	assessment={"active": True, "idx": st["idx"] + 1, "total": len(QUESTION_FLOW), "question": q}
	)

	# Otherwise normal chat via LLM
	answer = llm_reply(SYSTEM_PROMPT, inp.message)
	return ChatOut(
	session_id=sid,
	reply=answer,
	assessment={"active": st.get("active", False), "idx": st.get("idx", 0), "total": len(QUESTION_FLOW)}
	)