import os from fastapi import FastAPI from dotenv import load_dotenv from huggingface_hub.inference._mcp.agent import Agent import gradio as gr import uvicorn from fastapi.responses import RedirectResponse from fastapi.middleware.cors import CORSMiddleware from typing import Optional, Literal load_dotenv() HF_TOKEN=os.getenv("HF_TOKEN") HF_MODEL=os.getenv("HF_MODEL","Qwen/Qwen1.5-0.5B-Chat") app=FastAPI(title="MODEL-CARD-CHATBOT") app.add_middleware(CORSMiddleware,allow_origins=["*"],allow_methods=["*"],allow_header=["*"]) agent_instance: Optional[Agent]=None DEFAULT_PROVIDER:Literal['hf-inference']="hf-inference" async def get_agent(): """Get or create the agent instance.""" global agent_instance if agent_instance is None and HF_TOKEN: print("🔧 Creating new Agent instance ...") print(f"✅ HF_TOKEN present : {bool(HF_TOKEN)}") print(f"🤖 Model: {HF_MODEL}") print(f"Provider: {DEFAULT_PROVIDER}") try: agent=Agent(model=HF_MODEL,provider="hf-inference",api_key=HF_TOKEN, servers=[{"type":"stdio","config":{ "command":"python", "args":["mcp_server.py"],"cwd":".","env":{"HF_TOKEN": HF_TOKEN} if HF_TOKEN else {}}}]) print("🚀 Agent instance created successfully") print("🔁 loading tools ...") await agent_instance.load_tools() print("✅ Tools loaded successfully") except Exception as e: print(f"❌ Error creating/loading agent: {str(e)}") return agent_instance @app.on_event("startup") async def startup_event(): global agent_instance agent_instance = await get_agent() def chat_function(user_message,history,model_id): """Handles a user question by prompting the agent to read a model card and answer. Args: user_message (str): The user's question. history (list): Chat history for display. model_id (str): Hugging Face repo ID Returns: Tuple[list,str]: Updated Chat history and empty string for clearing input.""" prompt=f"""You're an assistant helping with hugging face model cards. First, run the tool `read_model_card` on repo_id `{model_id}` to get the model card. Then answer this user question based on the model card: User question: {user_message}""" history=history+[(user_message,None)] try: response="" for output in agent_instance.run(prompt): if hasattr(output,"content") and output.content: response=output.content final_response=response or "⚠️ Sorry, I couldn't generate a response." history[-1]=(user_message,final_response) except Exception as e: history[-1]=(user_message,f"⚠️ Error: {str(e)}") return history, "" def create_gradio_app(): with gr.Blocks(title="Model Card Chatbot") as demo: gr.Markdown("## 🤖 Model Card Chatbot\nAsk questions about Hugging Face model card") with gr.Row(): model_id=gr.Textbox(label="MODEL ID", value="google/gemma-2-2b") user_input=gr.Textbox(label="Your Question",value="Ask something about the model card .....") chat=gr.Chatbot(label="chat") send=gr.Button("Ask") send.click(fn=chat_function,inputs=[user_input,chat,model_id],outputs=[chat,user_input]) return demo gradio_app=create_gradio_app() app=gr.mount_gradio_app(app,gradio_app,path="/") @app.get("/") async def root(): return RedirectResponse("/") if __name__=="__main__": uvicorn.run("app:app",host="0.0.0.0",port=7860,reload=True)