Spaces:
Sleeping
Sleeping
| import os | |
| from fastapi import FastAPI | |
| from dotenv import load_dotenv | |
| from huggingface_hub.inference._mcp.agent import Agent | |
| import gradio as gr | |
| import uvicorn | |
| from fastapi.responses import RedirectResponse | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from typing import Optional, Literal | |
| load_dotenv() | |
| HF_TOKEN=os.getenv("HF_TOKEN") | |
| HF_MODEL=os.getenv("HF_MODEL","Qwen/Qwen1.5-0.5B-Chat") | |
| app=FastAPI(title="MODEL-CARD-CHATBOT") | |
| app.add_middleware(CORSMiddleware,allow_origins=["*"],allow_methods=["*"],allow_headers=["*"]) | |
| agent_instance: Optional[Agent]=None | |
| DEFAULT_PROVIDER:Literal['hf-inference']="hf-inference" | |
| async def get_agent(): | |
| global agent_instance | |
| if agent_instance is None and HF_TOKEN: | |
| print("🔧 Creating new Agent instance ...") | |
| print(f"✅ HF_TOKEN present : {bool(HF_TOKEN)}") | |
| print(f"🤖 Model: {HF_MODEL}") | |
| print(f"Provider: {DEFAULT_PROVIDER}") | |
| try: | |
| agent = Agent( | |
| model=HF_MODEL, | |
| provider="hf-inference", | |
| api_key=HF_TOKEN, | |
| servers=[{ | |
| "type": "stdio", | |
| "config": { | |
| "command": "python", | |
| "args": ["mcp_server.py"], | |
| "cwd": ".", | |
| "env": {"HF_TOKEN": HF_TOKEN} if HF_TOKEN else {} | |
| } | |
| }] | |
| ) | |
| print("🚀 Agent instance created successfully") | |
| print("🔁 loading tools ...") | |
| await agent.load_tools() | |
| agent_instance = agent | |
| print("✅ Tools loaded successfully") | |
| except Exception as e: | |
| print(f"❌ Error creating/loading agent: {str(e)}") | |
| return agent_instance | |
| async def startup_event(): | |
| global agent_instance | |
| agent_instance = await get_agent() | |
| async def chat_function(user_message, history, model_id): | |
| prompt=f"""You're an assistant helping with hugging face model cards. | |
| First, run the tool `read_model_card` on repo_id `{model_id}` to get the model card. | |
| Then answer this user question based on the model card: | |
| User question: {user_message}""" | |
| history = history + [(user_message, None)] | |
| try: | |
| response = "" | |
| async for output in agent_instance.run(prompt): | |
| if hasattr(output, "content") and output.content: | |
| response = output.content | |
| final_response = response or "⚠️ Sorry, I couldn't generate a response." | |
| history[-1] = (user_message, final_response) | |
| except Exception as e: | |
| history[-1] = (user_message, f"⚠️ Error: {str(e)}") | |
| return history, "" | |
| def create_gradio_app(): | |
| with gr.Blocks(title="Model Card Chatbot") as demo: | |
| gr.Markdown("## 🤖 Model Card Chatbot\nAsk questions about Hugging Face model card") | |
| with gr.Row(): | |
| model_id=gr.Textbox(label="MODEL ID", value="google/gemma-2-2b") | |
| user_input=gr.Textbox(label="Your Question",value="Ask something about the model card .....") | |
| send=gr.Button("Ask") | |
| chatbot=gr.Chatbot(label="chat") | |
| send.click(fn=chat_function,inputs=[user_input,chatbot,model_id],outputs=[chatbot,user_input]) | |
| return demo | |
| gradio_app=create_gradio_app() | |
| app=gr.mount_gradio_app(app,gradio_app,path="/") | |
| async def root(): | |
| return RedirectResponse("/") | |
| if __name__=="__main__": | |
| uvicorn.run("app:app",host="0.0.0.0",port=7860,reload=True) | |