Spaces:
Sleeping
Sleeping
| import os | |
| from huggingface_hub import login | |
| import gradio as gr | |
| from PIL import Image | |
| from transformers import AutoProcessor, AutoModelForImageTextToText | |
| import torch | |
| import spaces | |
| from peft import PeftModel | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| # Function to process vision information | |
| def process_vision_info(messages: list[dict]) -> list[Image.Image]: | |
| image_inputs = [] | |
| for msg in messages: | |
| content = msg.get("content", []) | |
| if not isinstance(content, list): | |
| content = [content] | |
| for element in content: | |
| if isinstance(element, dict) and ("image" in element or element.get("type") == "image"): | |
| image = element["image"] if "image" in element else element | |
| image_inputs.append(image.convert("RGB")) | |
| return image_inputs | |
| # Load image model and processor on CPU | |
| def load_image_model(): | |
| model_name = "JoannaKOKO/Gemma3-4b_tarot" | |
| model = AutoModelForImageTextToText.from_pretrained( | |
| model_name, | |
| device_map="cpu", | |
| torch_dtype=torch.bfloat16, | |
| attn_implementation="eager", | |
| ) | |
| processor = AutoProcessor.from_pretrained(model_name) | |
| return processor, model | |
| # Load text model on CPU | |
| def load_text_model(): | |
| tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B-Instruct") | |
| model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-3B-Instruct") | |
| return model, tokenizer | |
| # Generate card description with ZeroGPU | |
| def generate_description(sample, model, processor): | |
| # Ensure the model is on GPU | |
| model.to('cuda') | |
| system_message = 'You are a Tarot Card Identifier providing the card names and whether they are in upright or reversed position.' | |
| messages = [ | |
| {"role": "system", "content": [{"type": "text", "text": system_message}]}, | |
| {"role": "user", "content": [ | |
| {"type": "image", "image": sample["image"]}, | |
| {"type": "text", "text": sample["prompt"]}, | |
| ]}, | |
| ] | |
| text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| image_inputs = process_vision_info(messages) | |
| inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to("cuda") | |
| stop_token_ids = [processor.tokenizer.eos_token_id, processor.tokenizer.convert_tokens_to_ids("<end_of_turn>")] | |
| generated_ids = model.generate( | |
| **inputs, | |
| max_new_tokens=256, | |
| top_p=1.0, | |
| do_sample=True, | |
| temperature=0.8, | |
| eos_token_id=stop_token_ids, | |
| disable_compile=True | |
| ) | |
| generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)] | |
| # Move trimmed IDs to CPU before decoding | |
| generated_ids_trimmed_cpu = [ids.cpu() for ids in generated_ids_trimmed] | |
| output_text = processor.batch_decode( | |
| generated_ids_trimmed_cpu, | |
| skip_special_tokens=True, | |
| clean_up_tokenization_spaces=False | |
| ) | |
| return output_text[0] | |
| # Generate tarot interpretation with ZeroGPU | |
| def generate_interpretation(question, cards, model, tokenizer): | |
| # Ensure the model is on GPU | |
| model.to('cuda') | |
| prompt = f"""Analyze this tarot reading for the question: {question} | |
| Cards: | |
| 1. Reason: {cards[0]} | |
| 2. Result: {cards[1]} | |
| 3. Recommendation: {cards[2]} | |
| Provide a professional interpretation covering: | |
| - Individual card meanings in their positions | |
| - Combined message and symbolism | |
| - Practical advice | |
| - Potential outcomes""" | |
| messages = [ | |
| {"role": "system", "content": "You are a Tarot Card Explainer provideing relevant suggestions based on tarot card name"}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| text = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| model_inputs = tokenizer([text], return_tensors="pt").to("cuda") | |
| generated_ids = model.generate(**model_inputs, max_new_tokens=512) | |
| generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)] | |
| # Move output to CPU before decoding | |
| response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| return response | |
| def main(): | |
| """ | |
| Main function to set up and launch the Gradio tarot reading application. | |
| Handles authentication, model loading, and interface creation. | |
| """ | |
| # Authenticate with Hugging Face | |
| hf_token = os.environ.get("HF_TOKEN") | |
| if not hf_token: | |
| raise ValueError("HF_TOKEN not found in environment variables!") | |
| login(token=hf_token) | |
| # Load models on CPU | |
| image_processor, image_model = load_image_model() | |
| text_model, text_tokenizer = load_text_model() | |
| # Define the tarot processing function | |
| def process_tarot(question, reason_img, result_img, recommendation_img): | |
| """ | |
| Process the user's question and tarot card images to generate a reading. | |
| Uses loaded models for card identification and interpretation. | |
| """ | |
| try: | |
| # Validate image uploads | |
| if any(img is None for img in [reason_img, result_img, recommendation_img]): | |
| return "Please upload all three cards!" | |
| # Generate descriptions for each card using GPU | |
| cards = [] | |
| for img in [reason_img, result_img, recommendation_img]: | |
| sample = { | |
| "prompt": "Please tell me the name of the tarot card in this image, specify 'reversed' if it is. ", | |
| "image": img.convert("RGB") | |
| } | |
| card = generate_description(sample, image_model, image_processor) | |
| cards.append(card) | |
| output = "### Identifying Card Name...\n" | |
| # Generate the full interpretation using GPU | |
| interpretation = generate_interpretation(question, cards, text_model, text_tokenizer) | |
| card_cat = ['Reason Card', 'Result Card', 'Recommendation Card'] | |
| # Format the output | |
| output += "### Card Analysis\n" | |
| for i, card in enumerate(cards, 1): | |
| output += f"**{card_cat[i-1]}:** {card}\n\n" | |
| output += "### Full Interpretation\n" | |
| output += interpretation | |
| output += "\n\n**Reading Ends.**" | |
| return output | |
| except Exception as e: | |
| return f"Error in reading: {str(e)}" | |
| # Set up the Gradio interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 🔮 Acrane Intelligence (A.I.)") | |
| gr.Markdown("### Artifical Intelligence Supported Tarot Reading Application") | |
| question = gr.Textbox( | |
| label="Your Question", | |
| placeholder="Enter your question for the cards...", | |
| lines=3 | |
| ) | |
| with gr.Row(): | |
| reason_img = gr.Image(label="Reason Card", type="pil") | |
| result_img = gr.Image(label="Result Card", type="pil") | |
| recommendation_img = gr.Image(label="Recommendation Card", type="pil") | |
| submit_btn = gr.Button("Perform Reading") | |
| output = gr.Markdown() | |
| # Connect the button to the processing function | |
| submit_btn.click( | |
| fn=lambda: "Reading in progress...", # Show progress message | |
| inputs=None, | |
| outputs=output | |
| ).then( | |
| fn=process_tarot, # Run the tarot reading | |
| inputs=[question, reason_img, result_img, recommendation_img], # Pass all inputs | |
| outputs=output # Update the same output with the result | |
| ) | |
| # Launch the application | |
| demo.launch() | |
| # Entry point of the script | |
| if __name__ == "__main__": | |
| main() |