Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +110 -0
- requirements.txt +10 -0
app.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from PIL import Image
|
| 3 |
+
#importing models
|
| 4 |
+
from transformers import BlipProcessor, BlipForConditionalGeneration
|
| 5 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
import os
|
| 8 |
+
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
|
| 9 |
+
import gradio as gr
|
| 10 |
+
from diffusers import DiffusionPipeline,StableDiffusion3Pipeline
|
| 11 |
+
from huggingface_hub import login
|
| 12 |
+
|
| 13 |
+
load_dotenv()
|
| 14 |
+
|
| 15 |
+
Hugging_face_token=os.getenv('huggingface_token')
|
| 16 |
+
|
| 17 |
+
login(Hugging_face_token)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
# loading image captionning model
|
| 21 |
+
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
|
| 22 |
+
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
|
| 23 |
+
|
| 24 |
+
# Set the model name for our LLMs.
|
| 25 |
+
GEMINI_MODEL = "gemini-2.0-flash"
|
| 26 |
+
|
| 27 |
+
# Store the API key in a variable.
|
| 28 |
+
GEMINI_API_KEY = os.getenv("google_api_key")
|
| 29 |
+
|
| 30 |
+
class stable_dif:
|
| 31 |
+
def __init__(self,sizes):
|
| 32 |
+
self.sizes=sizes
|
| 33 |
+
|
| 34 |
+
def model(self):
|
| 35 |
+
if self.sizes == 'medium':
|
| 36 |
+
pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
|
| 37 |
+
elif self.sizes == 'large':
|
| 38 |
+
pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-large-turbo")
|
| 39 |
+
elif self.sizes == 'small':
|
| 40 |
+
pipe = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
|
| 41 |
+
return pipe
|
| 42 |
+
|
| 43 |
+
stable=stable_dif('small')
|
| 44 |
+
pipe=stable.model()
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def image_story_generator(image,requirement,style):
|
| 49 |
+
|
| 50 |
+
raw_image = Image.open(image)
|
| 51 |
+
|
| 52 |
+
# get caption from image
|
| 53 |
+
inputs = processor(raw_image, return_tensors="pt")
|
| 54 |
+
out = model.generate(**inputs, min_length=20)
|
| 55 |
+
model_prompt=processor.decode(out[0], skip_special_tokens=True)
|
| 56 |
+
|
| 57 |
+
#load gemnini for creating story
|
| 58 |
+
llm = ChatGoogleGenerativeAI(google_api_key=GEMINI_API_KEY, model=GEMINI_MODEL, temperature=0.3)
|
| 59 |
+
|
| 60 |
+
query =f' Write a 4 chapters story based on {model_prompt} and\
|
| 61 |
+
that fits the following requirements: {requirement}. Give a detailed\
|
| 62 |
+
description of the charaters appearences.'
|
| 63 |
+
|
| 64 |
+
result = llm.invoke(query)
|
| 65 |
+
story= result.content.replace('\n',' ')
|
| 66 |
+
|
| 67 |
+
# create promts for image gen from story
|
| 68 |
+
image_prompt_llm=ChatGoogleGenerativeAI(google_api_key=GEMINI_API_KEY, model=GEMINI_MODEL, temperature=0.3)
|
| 69 |
+
|
| 70 |
+
# create shemas to format output
|
| 71 |
+
schemas=[
|
| 72 |
+
ResponseSchema(name='prompt 1', description='the prompt'),
|
| 73 |
+
ResponseSchema(name='prompt 2', description='the prompt'),
|
| 74 |
+
ResponseSchema(name='prompt 3', description='the prompt'),
|
| 75 |
+
ResponseSchema(name='prompt 4', description='the prompt')
|
| 76 |
+
]
|
| 77 |
+
|
| 78 |
+
# initialize parser for output
|
| 79 |
+
parser=StructuredOutputParser.from_response_schemas(schemas)
|
| 80 |
+
instructions=parser.get_format_instructions()
|
| 81 |
+
|
| 82 |
+
query = f' Based on this story: {story}. Create 4 prompts for stable diffusion that tells of a maximum of 77 tokens\
|
| 83 |
+
what happens in each chapters. Describe the characters everytime their name is mentioned. Each image should be created in the same exact style {style}.\
|
| 84 |
+
'+ '\n\n'+instructions
|
| 85 |
+
|
| 86 |
+
result=image_prompt_llm.invoke(query)
|
| 87 |
+
image_prompts = parser.parse(result.content)
|
| 88 |
+
|
| 89 |
+
# iterate through the prompts and generate new images
|
| 90 |
+
images=[]
|
| 91 |
+
for i in image_prompts.keys():
|
| 92 |
+
|
| 93 |
+
image = pipe(image_prompts[i]).images[0]
|
| 94 |
+
images.append(image)
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
return images, story
|
| 98 |
+
|
| 99 |
+
# gradio
|
| 100 |
+
interface = gr.Interface(
|
| 101 |
+
fn=image_story_generator,
|
| 102 |
+
inputs=[gr.Image(type='filepath'),gr.Textbox('enter story requirements'), gr.Textbox('pick a style for the images')],
|
| 103 |
+
|
| 104 |
+
outputs=[gr.Gallery(),
|
| 105 |
+
gr.Textbox('story')
|
| 106 |
+
],
|
| 107 |
+
description='Upload an image to start the story generation process.'
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
interface.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
diffusers==0.32.2
|
| 2 |
+
gradio==5.15.0
|
| 3 |
+
huggingface_hub==0.26.2
|
| 4 |
+
huggingface_hub==0.28.1
|
| 5 |
+
langchain==0.3.18
|
| 6 |
+
langchain_google_genai==2.0.9
|
| 7 |
+
Pillow==11.1.0
|
| 8 |
+
python-dotenv==1.0.1
|
| 9 |
+
transformers==4.46.1
|
| 10 |
+
transformers==4.48.3
|