Spaces:

shri171981
/

hack_doc_deployment

Sleeping

hack_doc_deployment / app.py

fix: updated model source

4df8947 verified about 1 month ago

1.73 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	import os

	# 1. Setup the Client
	# We fetch the token you just added to Secrets
	client = InferenceClient(token=os.getenv("HF_TOKEN"))

	# 2. Your Model ID (The Adapter)
	# The API is smart enough to see it's an adapter and load the Base Model automatically.
	MODEL_ID = "shri171981/medical_chat_generative"

	def ask_api(message, history):
	# 3. Format the prompt (Strict Llama-3 format)
	system_prompt = "You are a helpful and empathetic medical doctor. Answer the patient's question based on the input provided."
	prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

	### Instruction:
	{system_prompt}

	### Input:
	{message}

	### Response:
	"""

	try:
	# 4. Send to the API
	response = client.text_generation(
	prompt,
	model=MODEL_ID,
	max_new_tokens=128,
	temperature=0.7,
	return_full_text=False # We only want the new part
	)
	return response

	except Exception as e:
	# 5. Handle "Model Loading" errors
	# If the model is cold, the API returns a 503 error.
	if "Model is loading" in str(e):
	return "⚠️ The model is waking up (Cold Start). Please wait 30 seconds and try again!"
	return f"Error: {str(e)}"

	# 6. Launch
	demo = gr.ChatInterface(
	fn=ask_api,
	title="🚑 HACK_DOC (API Powered)",
	description="Running on Hugging Face Serverless GPU via API.",
	examples=["I have a sharp pain in my chest.", "What is good for a fever?"],
	)

	if __name__ == "__main__":
	demo.launch()