Spaces:
Runtime error
Runtime error
| import torch | |
| from diffusers import DiffusionPipeline | |
| import gradio as gr | |
| import numpy as np | |
| import openai | |
| import os | |
| import spaces | |
| import base64 | |
| # Setup logging | |
| # logging.basicConfig(level=logging.DEBUG) | |
| # logger = logging.getLogger(__name__) | |
| # Retrieve the OpenAI API key from the environment | |
| API_KEY = os.getenv('OPEN_AI_API_KEY') | |
| DESCRIPTION = ''' | |
| <div> | |
| <h1 style="text-align: center;">Book-Reader</h1> | |
| <p style="text-align: center;">This contains a Stable Diffusor from <a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0"><b>stabilityai/stable-diffusion-xl-base-1.0</b></a></p> | |
| <p style="text-align: center;">For Instructions on how to use the models <a href="https://huggingface.co/spaces/sandz7/chimera/blob/main/README.md"><b>view this</b></a></p> | |
| </div> | |
| ''' | |
| # load both base and refiner | |
| base = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16").to("cuda:0") | |
| refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", | |
| text_encoder_2=base.text_encoder_2, | |
| vae=base.vae, | |
| torch_dtype=torch.float16, | |
| use_safetensor=True, | |
| variant="fp16").to("cuda:0") | |
| chat_mode = {} | |
| def encode_image(image_path): | |
| chat_mode["the_mode"] = "diffusing" | |
| with open(image_path, "rb") as image_file: | |
| return base64.b64encode(image_file.read()).decode('utf-8') | |
| def generation(message, history): | |
| """ | |
| Generates a response based on the input message and optionally an image. | |
| """ | |
| global chat_mode | |
| image_path = None | |
| if "files" in message and message["files"]: | |
| if type(message["files"][-1]) == dict: | |
| image_path = message["files"][-1]["path"] | |
| else: | |
| image_path = message["files"][-1] | |
| else: | |
| for hist in history: | |
| if type(hist[0]) == tuple: | |
| image_path = hist[0][0] | |
| input_prompt = message if isinstance(message, str) else message.get("text", "") | |
| if image_path is None: | |
| chat_mode["mode"] = "text" | |
| client = openai.OpenAI(api_key=API_KEY) | |
| stream = client.chat.completions.create( | |
| model="gpt-3.5-turbo", | |
| messages=[{"role": "system", "content": "You are a helpful assistant called 'chimera'."}, | |
| {"role": "user", "content": input_prompt}], | |
| stream=True, | |
| ) | |
| return stream | |
| else: | |
| chat_mode["mode"] = "image" | |
| base64_image = encode_image(image_path=image_path) | |
| client = openai.OpenAI(api_key=API_KEY) | |
| stream = client.chat.completions.create( | |
| model="gpt-4o", | |
| messages=[{"role": "system", "content": "You are a helpful assistant called 'chimera'."}, | |
| {"role": "user", "content": [ | |
| {"type": "text", "text": input_prompt}, | |
| {"type": "image_url", "image_url": { | |
| "url": f"data:image/jpeg;base64,{base64_image}" | |
| }} | |
| ]}], | |
| stream=True, | |
| ) | |
| return stream | |
| # function to take input and generate text tokena | |
| def diffusing(prompt: str, | |
| n_steps: int, | |
| denoising: float): | |
| """ | |
| Takes input, passes it into the pipeline, | |
| get the top 5 scores, and ouput those scores into images | |
| """ | |
| # Generate image based on text | |
| image_base = base( | |
| prompt=prompt, | |
| num_inference_steps=n_steps, | |
| denoising_end=denoising, | |
| output_type="latent" | |
| ).images | |
| image = refiner( | |
| prompt=prompt, | |
| num_inference_steps=n_steps, | |
| denoising_start=denoising, | |
| image=image_base | |
| ).images[0] | |
| return image | |
| def check_cuda_availability(): | |
| if torch.cuda.is_available(): | |
| return f"GPU: {torch.cuda.get_device_name(0)}" | |
| else: | |
| return "No CUDA device found." | |
| # Image created from diffusing | |
| image_created = {} | |
| def bot_comms(message, history): | |
| """ | |
| Handles communication between Gradio and the models. | |
| """ | |
| # ensures message is a dictionary | |
| if not isinstance(message, dict): | |
| message = {"text": message} | |
| if message["text"] == "check cuda": | |
| yield check_cuda_availability() | |
| return | |
| buffer = "" | |
| gpt_outputs = [] | |
| stream = generation(message, history) | |
| for chunk in stream: | |
| if chunk.choices[0].delta.content is not None: | |
| text = chunk.choices[0].delta.content | |
| if text: | |
| gpt_outputs.append(text) | |
| buffer += text | |
| yield "".join(gpt_outputs) | |
| chat_input = gr.MultimodalTextbox(interactive=True, file_types=["images"], placeholder="Enter your question or upload an image.", show_label=False) | |
| with gr.Blocks(fill_height=True) as demo: | |
| with gr.Row(): | |
| # Diffusing | |
| with gr.Column(): | |
| gr.Markdown(DESCRIPTION) | |
| image_prompt = gr.Textbox(label="Image Prompt") | |
| output_image = gr.Image(label="Generated Image") | |
| generate_image_button = gr.Button("Generate Image") | |
| # generate_image_button.click(fn=diffusing, inputs=image_prompt, outputs=output_image) | |
| with gr.Accordion(label="⚙️ Parameters", open=False): | |
| steps_slider = gr.Slider( | |
| minimum=20, | |
| maximum=100, | |
| step=1, | |
| value=40, | |
| label="Number of Inference Steps" | |
| ) | |
| denoising_slider = gr.Slider( | |
| minimum=0.0, | |
| maximum=1.0, | |
| step=0.1, | |
| value=0.8, | |
| label="High Noise Fraction" | |
| ) | |
| generate_image_button.click( | |
| fn=diffusing, | |
| inputs=[image_prompt, steps_slider, denoising_slider], | |
| outputs=output_image | |
| ) | |
| with gr.Column(): | |
| # GPT-3.5 | |
| gr.Markdown(''' | |
| <div> | |
| <h1 style="text-align: center;">Smart Reader</h1> | |
| <p style="text-align: center;">This contains a Generative LLM from <a href="https://openai.com/"><b>Open AI</b></a> called GPT-3.5-Turbo and Vision.</p> | |
| <p style="text-align: center;">For Instructions on how to use the models <a href="https://huggingface.co/spaces/sandz7/chimera/blob/main/README.md"><b>view this</b></a></p> | |
| </div> | |
| ''') | |
| chat = gr.ChatInterface(fn=bot_comms, | |
| multimodal=True, | |
| textbox=chat_input) | |
| demo.launch() | |