import gradio as gr import requests import os # Title and description title = "🧠 Claude-3.7-Sonnet-Reasoning-Gemma3-12B Chat" description = """ Chat with the Claude-3.7-Sonnet-Reasoning-Gemma3-12B model using Hugging Face Inference API. """ # Hugging Face API setup API_URL = "https://router.huggingface.co/models/reedmayhew/claude-3.7-sonnet-reasoning-gemma3-12B" headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}"} def query_hf_api(payload): """Query Hugging Face Inference API""" try: response = requests.post(API_URL, headers=headers, json=payload) return response.json() except Exception as e: return {"error": str(e)} def chat_with_claude(message, chat_history): """Chat function using HF Inference API""" # Build conversation context conversation = "" for msg in chat_history: if msg["role"] == "user": conversation += f"Human: {msg['content']}\n" else: conversation += f"Assistant: {msg['content']}\n" conversation += f"Human: {message}\nAssistant:" # Call the API payload = { "inputs": conversation, "parameters": { "max_new_tokens": 500, "temperature": 0.7, "top_p": 0.9, "do_sample": True } } response = query_hf_api(payload) if "error" in response: assistant_response = f"❌ API Error: {response['error']}" elif isinstance(response, list) and len(response) > 0: # Extract the generated text if "generated_text" in response[0]: full_text = response[0]["generated_text"] # Extract only the assistant's response assistant_response = full_text.split("Assistant:")[-1].strip() else: assistant_response = "❌ Unexpected response format from API" else: assistant_response = "❌ No response from API" # Update chat history chat_history.append({"role": "user", "content": message}) chat_history.append({"role": "assistant", "content": assistant_response}) return "", chat_history def clear_chat(): """Clear the chat history""" return [] # Create the Gradio interface with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown(f"# {title}") gr.Markdown(description) # Add HF Token info gr.Markdown(""" **Note:** To use this chat, you need to add your Hugging Face token in the Space settings: 1. Go to Settings → Repository secrets 2. Add `HF_TOKEN` with your Hugging Face token """) chatbot = gr.Chatbot( label="Chat with Claude", height=500, type="messages" # Fixed the deprecation warning ) with gr.Row(): msg = gr.Textbox( label="Type your message here...", placeholder="Ask me anything...", lines=2, scale=4, container=False ) submit_btn = gr.Button("Send", variant="primary", scale=1) with gr.Row(): clear_btn = gr.Button("Clear Chat", variant="secondary") # Example questions gr.Examples( examples=[ "Explain quantum computing in simple terms", "Write a Python function to calculate fibonacci sequence", "What are the benefits of renewable energy?", "How does machine learning work?" ], inputs=msg ) # Event handlers def handle_submit(message, chat_history): if not message.strip(): return "", chat_history return chat_with_claude(message, chat_history) submit_btn.click( fn=handle_submit, inputs=[msg, chatbot], outputs=[msg, chatbot] ) msg.submit( fn=handle_submit, inputs=[msg, chatbot], outputs=[msg, chatbot] ) clear_btn.click( fn=clear_chat, outputs=chatbot ) gr.Markdown(""" ### Tips: - Be specific with your questions for better responses - The model is good at reasoning and explanation tasks - Responses may take a few seconds to generate """) # Launch the app if __name__ == "__main__": demo.launch()