import os import requests import tempfile from pathlib import Path import secrets from PIL import Image import gradio as gr # Set your Hugging Face API token HUGGING_FACE_API_KEY = os.getenv("HUGGING_FACE_API_KEY") if not HUGGING_FACE_API_KEY: raise ValueError("Please set the Hugging Face API key in the environment as 'HUGGING_FACE_API_KEY'.") math_messages = [] # Function to process the image with Hugging Face API def process_image(image, shouldConvert=False): global math_messages math_messages = [] # Reset messages when a new image is uploaded uploaded_file_dir = os.environ.get("GRADIO_TEMP_DIR") or str(Path(tempfile.gettempdir()) / "gradio") os.makedirs(uploaded_file_dir, exist_ok=True) name = f"tmp{secrets.token_hex(20)}.jpg" filename = os.path.join(uploaded_file_dir, name) # Save the uploaded image if shouldConvert: new_img = Image.new('RGB', (image.width, image.height), (255, 255, 255)) new_img.paste(image, (0, 0), mask=image) image = new_img image.save(filename) # Use Hugging Face API for image captioning with open(filename, "rb") as img_file: response = requests.post( "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-base", headers={"Authorization": f"Bearer {HUGGING_FACE_API_KEY}"}, files={"file": img_file} ) os.remove(filename) # Clean up temp file # Check if response is successful and handle errors if response.status_code == 200: caption = response.json().get("generated_text", "No description available.") else: caption = f"Error: {response.status_code} - {response.json().get('error', 'Unknown error')}" return caption # Function for getting math responses from Hugging Face's text generation API def get_math_response(image_description, user_question): global math_messages if not math_messages: math_messages.append({"role": "system", "content": "You are a helpful math assistant."}) # Prepare the query content content = f"Image description: {image_description}\n\n" if image_description else "" query = f"{content}User question: {user_question}" math_messages.append({"role": "user", "content": query}) # Make the text generation call payload = { "inputs": query, "parameters": {"max_length": 100, "temperature": 0.7}, } response = requests.post( "https://api-inference.huggingface.co/models/gpt2", headers={"Authorization": f"Bearer {HUGGING_FACE_API_KEY}"}, json=payload ) # Check if response is successful and handle errors if response.status_code == 200: answer = response.json().get("generated_text", "Sorry, I couldn't generate a response.") else: answer = f"Error: {response.status_code} - {response.json().get('error', 'Unknown error')}" yield answer math_messages.append({"role": "assistant", "content": answer}) def math_chat_bot(image, sketchpad, question, state): current_tab_index = state["tab_index"] image_description = None # Check for uploaded image if current_tab_index == 0 and image: image_description = process_image(image) elif current_tab_index == 1 and sketchpad and sketchpad["composite"]: image_description = process_image(sketchpad["composite"], True) # Get response from the text generation API yield from get_math_response(image_description, question) css = """ #qwen-md .katex-display { display: inline; } #qwen-md .katex-display>.katex { display: inline; } #qwen-md .katex-display>.katex>.katex-html { display: inline; } """ def tabs_select(e: gr.SelectData, _state): _state["tab_index"] = e.index # Create Gradio interface with gr.Blocks(css=css) as demo: gr.HTML("""