Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| import os | |
| # 1. Setup the Client | |
| # We fetch the token you just added to Secrets | |
| client = InferenceClient(token=os.getenv("HF_TOKEN")) | |
| # 2. Your Model ID (The Adapter) | |
| # The API is smart enough to see it's an adapter and load the Base Model automatically. | |
| MODEL_ID = "shri171981/medical_chat_generative" | |
| def ask_api(message, history): | |
| # 3. Format the prompt (Strict Llama-3 format) | |
| system_prompt = "You are a helpful and empathetic medical doctor. Answer the patient's question based on the input provided." | |
| prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. | |
| ### Instruction: | |
| {system_prompt} | |
| ### Input: | |
| {message} | |
| ### Response: | |
| """ | |
| try: | |
| # 4. Send to the API | |
| response = client.text_generation( | |
| prompt, | |
| model=MODEL_ID, | |
| max_new_tokens=128, | |
| temperature=0.7, | |
| return_full_text=False # We only want the new part | |
| ) | |
| return response | |
| except Exception as e: | |
| # 5. Handle "Model Loading" errors | |
| # If the model is cold, the API returns a 503 error. | |
| if "Model is loading" in str(e): | |
| return "⚠️ The model is waking up (Cold Start). Please wait 30 seconds and try again!" | |
| return f"Error: {str(e)}" | |
| # 6. Launch | |
| demo = gr.ChatInterface( | |
| fn=ask_api, | |
| title="🚑 HACK_DOC (API Powered)", | |
| description="Running on Hugging Face Serverless GPU via API.", | |
| examples=["I have a sharp pain in my chest.", "What is good for a fever?"], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |