shri171981's picture
fix: updated model source
4df8947 verified
import gradio as gr
from huggingface_hub import InferenceClient
import os
# 1. Setup the Client
# We fetch the token you just added to Secrets
client = InferenceClient(token=os.getenv("HF_TOKEN"))
# 2. Your Model ID (The Adapter)
# The API is smart enough to see it's an adapter and load the Base Model automatically.
MODEL_ID = "shri171981/medical_chat_generative"
def ask_api(message, history):
# 3. Format the prompt (Strict Llama-3 format)
system_prompt = "You are a helpful and empathetic medical doctor. Answer the patient's question based on the input provided."
prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{system_prompt}
### Input:
{message}
### Response:
"""
try:
# 4. Send to the API
response = client.text_generation(
prompt,
model=MODEL_ID,
max_new_tokens=128,
temperature=0.7,
return_full_text=False # We only want the new part
)
return response
except Exception as e:
# 5. Handle "Model Loading" errors
# If the model is cold, the API returns a 503 error.
if "Model is loading" in str(e):
return "⚠️ The model is waking up (Cold Start). Please wait 30 seconds and try again!"
return f"Error: {str(e)}"
# 6. Launch
demo = gr.ChatInterface(
fn=ask_api,
title="🚑 HACK_DOC (API Powered)",
description="Running on Hugging Face Serverless GPU via API.",
examples=["I have a sharp pain in my chest.", "What is good for a fever?"],
)
if __name__ == "__main__":
demo.launch()