wolf1997 commited on
Commit
d082507
·
verified ·
1 Parent(s): 0fca85e

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +110 -0
  2. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from PIL import Image
3
+ #importing models
4
+ from transformers import BlipProcessor, BlipForConditionalGeneration
5
+ from langchain_google_genai import ChatGoogleGenerativeAI
6
+ from dotenv import load_dotenv
7
+ import os
8
+ from langchain.output_parsers import StructuredOutputParser, ResponseSchema
9
+ import gradio as gr
10
+ from diffusers import DiffusionPipeline,StableDiffusion3Pipeline
11
+ from huggingface_hub import login
12
+
13
+ load_dotenv()
14
+
15
+ Hugging_face_token=os.getenv('huggingface_token')
16
+
17
+ login(Hugging_face_token)
18
+
19
+
20
+ # loading image captionning model
21
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
22
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
23
+
24
+ # Set the model name for our LLMs.
25
+ GEMINI_MODEL = "gemini-2.0-flash"
26
+
27
+ # Store the API key in a variable.
28
+ GEMINI_API_KEY = os.getenv("google_api_key")
29
+
30
+ class stable_dif:
31
+ def __init__(self,sizes):
32
+ self.sizes=sizes
33
+
34
+ def model(self):
35
+ if self.sizes == 'medium':
36
+ pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-medium")
37
+ elif self.sizes == 'large':
38
+ pipe = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3.5-large-turbo")
39
+ elif self.sizes == 'small':
40
+ pipe = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
41
+ return pipe
42
+
43
+ stable=stable_dif('small')
44
+ pipe=stable.model()
45
+
46
+
47
+
48
+ def image_story_generator(image,requirement,style):
49
+
50
+ raw_image = Image.open(image)
51
+
52
+ # get caption from image
53
+ inputs = processor(raw_image, return_tensors="pt")
54
+ out = model.generate(**inputs, min_length=20)
55
+ model_prompt=processor.decode(out[0], skip_special_tokens=True)
56
+
57
+ #load gemnini for creating story
58
+ llm = ChatGoogleGenerativeAI(google_api_key=GEMINI_API_KEY, model=GEMINI_MODEL, temperature=0.3)
59
+
60
+ query =f' Write a 4 chapters story based on {model_prompt} and\
61
+ that fits the following requirements: {requirement}. Give a detailed\
62
+ description of the charaters appearences.'
63
+
64
+ result = llm.invoke(query)
65
+ story= result.content.replace('\n',' ')
66
+
67
+ # create promts for image gen from story
68
+ image_prompt_llm=ChatGoogleGenerativeAI(google_api_key=GEMINI_API_KEY, model=GEMINI_MODEL, temperature=0.3)
69
+
70
+ # create shemas to format output
71
+ schemas=[
72
+ ResponseSchema(name='prompt 1', description='the prompt'),
73
+ ResponseSchema(name='prompt 2', description='the prompt'),
74
+ ResponseSchema(name='prompt 3', description='the prompt'),
75
+ ResponseSchema(name='prompt 4', description='the prompt')
76
+ ]
77
+
78
+ # initialize parser for output
79
+ parser=StructuredOutputParser.from_response_schemas(schemas)
80
+ instructions=parser.get_format_instructions()
81
+
82
+ query = f' Based on this story: {story}. Create 4 prompts for stable diffusion that tells of a maximum of 77 tokens\
83
+ what happens in each chapters. Describe the characters everytime their name is mentioned. Each image should be created in the same exact style {style}.\
84
+ '+ '\n\n'+instructions
85
+
86
+ result=image_prompt_llm.invoke(query)
87
+ image_prompts = parser.parse(result.content)
88
+
89
+ # iterate through the prompts and generate new images
90
+ images=[]
91
+ for i in image_prompts.keys():
92
+
93
+ image = pipe(image_prompts[i]).images[0]
94
+ images.append(image)
95
+
96
+
97
+ return images, story
98
+
99
+ # gradio
100
+ interface = gr.Interface(
101
+ fn=image_story_generator,
102
+ inputs=[gr.Image(type='filepath'),gr.Textbox('enter story requirements'), gr.Textbox('pick a style for the images')],
103
+
104
+ outputs=[gr.Gallery(),
105
+ gr.Textbox('story')
106
+ ],
107
+ description='Upload an image to start the story generation process.'
108
+ )
109
+
110
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ diffusers==0.32.2
2
+ gradio==5.15.0
3
+ huggingface_hub==0.26.2
4
+ huggingface_hub==0.28.1
5
+ langchain==0.3.18
6
+ langchain_google_genai==2.0.9
7
+ Pillow==11.1.0
8
+ python-dotenv==1.0.1
9
+ transformers==4.46.1
10
+ transformers==4.48.3