Qwen_Base_Model_1.7b_GGUF / api_client.py
Yash030's picture
Deploy app with runtime model download
3f7e602
from gradio_client import Client
# URL of your Hugging Face Space
# Replace this with your actual Space URL, e.g., "https://huggingface.co/spaces/YourUsername/SpaceName"
# Or the direct URL like "https://yourusername-spacename.hf.space"
SPACE_URL = "YOUR_SPACE_URL_HERE"
def main():
print(f"Connecting to {SPACE_URL}...")
try:
client = Client(SPACE_URL)
except Exception as e:
print(f"Error connecting: {e}")
print("Make sure you have replaced 'YOUR_SPACE_URL_HERE' with your actual Space URL or Name (e.g., 'username/repo_name').")
return
print("Sending message...")
# The 'predict' function in app.py takes 'message' and 'history'.
# For ChatInterface, the endpoint is often just 'chat' taking the message.
# We'll use the .chat() method which is a helper for ChatInterface, or call the endpoint directly.
# Method 1: standard submit (if it was a simple Interface)
# result = client.predict(message="Hello!", history=[], api_name="/predict")
# Method 2: For ChatInterface, using the 'chat' endpoint if available or just passing inputs.
# Note: gradio_client usage depends on how the app is defined.
# Since we used gr.ChatInterface(fn=predict), the API usually expects the arguments of 'predict'.
chat_message = "What is the capital of France?"
# We stream the response
job = client.submit(
chat_message, # message
[], # history (empty list for new chat)
api_name="/chat" # Default api_name for ChatInterface is often /chat
)
print(f"User: {chat_message}")
print("AI: ", end="", flush=True)
final_output = ""
for token in job:
# ChatInterface returns the full history or the full message depending on version.
# But our 'predict' yields partial strings.
# Gradio client usually gives the full accumulated value in the yield for text.
# Let's print the upgrade.
final_output = token
print(final_output)
if __name__ == "__main__":
main()