UmaGeeth commited on
Commit
4e40230
Β·
verified Β·
1 Parent(s): fbd584b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -48
app.py CHANGED
@@ -1,7 +1,10 @@
1
  import gradio as gr
2
  from PIL import Image
3
  import torch
4
- from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForCausalLM
 
 
 
5
  from gtts import gTTS
6
  import tempfile
7
  import os
@@ -9,51 +12,38 @@ import os
9
  # Set device
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
 
12
- # Load BLIP for image captioning
13
  blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
14
  blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
15
 
16
- # Load language model (falcon-rw-1b preferred, fallback to gpt2)
17
  gpt_tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-rw-1b", trust_remote_code=True)
18
  gpt_model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-rw-1b", trust_remote_code=True).to(device)
19
 
20
- # Map for gTTS language codes
21
- LANG_CODE_MAP = {
22
- "English": "en",
23
- "Hindi": "hi",
24
- "Tamil": "ta",
25
- "Telugu": "te",
26
- "Malayalam": "ml",
27
- "Kannada": "kn",
28
- "Marathi": "mr",
29
- "Bengali": "bn"
30
- }
31
-
32
- # Generate caption from image
33
  def generate_caption(image):
34
  inputs = blip_processor(image, return_tensors="pt").to(device)
35
  out = blip_model.generate(**inputs)
36
  caption = blip_processor.decode(out[0], skip_special_tokens=True)
37
  return caption
38
 
39
- # Generate story or poem
40
- def generate_text(caption, theme, characters, language, content_type):
41
  if content_type.lower() == "story":
42
  prompt = f"{caption}. This inspired a story about {theme.lower()}"
43
  if characters:
44
  prompt += f" involving {characters}"
45
  prompt += ". It begins like this:\n"
46
- else: # poem
47
  prompt = f"{caption}. A poem themed around '{theme}'"
48
  if characters:
49
  prompt += f", mentioning {characters}"
50
  prompt += ":\n"
51
 
52
  input_ids = gpt_tokenizer.encode(prompt, return_tensors="pt").to(device)
53
-
54
  output_ids = gpt_model.generate(
55
  input_ids,
56
- max_length=250,
57
  do_sample=True,
58
  temperature=0.9,
59
  top_k=50,
@@ -66,56 +56,49 @@ def generate_text(caption, theme, characters, language, content_type):
66
  return output[len(prompt):].strip()
67
 
68
  # Main function
69
- def generate_output(image, theme, characters, language, content_type):
70
- if language not in LANG_CODE_MAP:
71
- return "Unsupported language", None, None
72
-
73
  caption = generate_caption(image)
74
- generated_text = generate_text(caption, theme, characters, language, content_type)
75
 
76
- # Save text to file
77
  txt_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding="utf-8")
78
  txt_file.write(generated_text)
79
  txt_file.close()
80
 
81
- # Generate audio with gTTS
82
- lang_code = LANG_CODE_MAP[language]
83
- tts = gTTS(text=generated_text, lang=lang_code)
84
  audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
85
- tts.save(audio_path)
 
 
 
 
86
 
87
  return generated_text, txt_file.name, audio_path
88
 
89
- # UI with Gradio Blocks
90
- with gr.Blocks(title="Multimodal Story & Poem Generator") as demo:
91
- gr.Markdown("## 🎭 Multimodal Story & Poem Generator")
92
- gr.Markdown("Upload an image, choose a theme and language, and get a creative story or poem with audio!")
93
 
94
  with gr.Row():
95
  image = gr.Image(type="pil", label="πŸ–ΌοΈ Upload Image")
96
 
97
-
98
  with gr.Row():
99
- theme = gr.Textbox(label="🎨 Enter a Theme (e.g., Friendship, Mystery, Dreams)")
100
  characters = gr.Textbox(label="πŸ§‘β€πŸ€β€πŸ§‘ Characters (Optional)")
101
 
102
  content_type = gr.Radio(["Poem", "Story"], label="πŸ“ Choose Content Type")
103
  generate_btn = gr.Button("✨ Generate")
104
 
105
- output_text = gr.Textbox(label="πŸ“œ Generated Text", lines=10)
106
- txt_file = gr.File(label="πŸ“„ Download .txt")
107
- audio_file = gr.Audio(label="πŸ”Š Listen / Download Audio")
108
 
109
  generate_btn.click(
110
- fn=generate_output,
111
- inputs=[image, theme, characters, content_type],
112
- outputs=[output_text, txt_file, audio_file]
113
  )
114
 
115
-
116
-
117
- # Launch the app
118
  if __name__ == "__main__":
119
  demo.launch()
120
-
121
-
 
1
  import gradio as gr
2
  from PIL import Image
3
  import torch
4
+ from transformers import (
5
+ BlipProcessor, BlipForConditionalGeneration,
6
+ AutoTokenizer, AutoModelForCausalLM
7
+ )
8
  from gtts import gTTS
9
  import tempfile
10
  import os
 
12
  # Set device
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
14
 
15
+ # Load BLIP model for image captioning
16
  blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
17
  blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
18
 
19
+ # Load Falcon model for story/poem generation
20
  gpt_tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-rw-1b", trust_remote_code=True)
21
  gpt_model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-rw-1b", trust_remote_code=True).to(device)
22
 
23
+ # Generate image caption
 
 
 
 
 
 
 
 
 
 
 
 
24
  def generate_caption(image):
25
  inputs = blip_processor(image, return_tensors="pt").to(device)
26
  out = blip_model.generate(**inputs)
27
  caption = blip_processor.decode(out[0], skip_special_tokens=True)
28
  return caption
29
 
30
+ # Generate story or poem from caption, theme, characters
31
+ def generate_text(caption, theme, characters, content_type):
32
  if content_type.lower() == "story":
33
  prompt = f"{caption}. This inspired a story about {theme.lower()}"
34
  if characters:
35
  prompt += f" involving {characters}"
36
  prompt += ". It begins like this:\n"
37
+ else:
38
  prompt = f"{caption}. A poem themed around '{theme}'"
39
  if characters:
40
  prompt += f", mentioning {characters}"
41
  prompt += ":\n"
42
 
43
  input_ids = gpt_tokenizer.encode(prompt, return_tensors="pt").to(device)
 
44
  output_ids = gpt_model.generate(
45
  input_ids,
46
+ max_length=300,
47
  do_sample=True,
48
  temperature=0.9,
49
  top_k=50,
 
56
  return output[len(prompt):].strip()
57
 
58
  # Main function
59
+ def generate_output(image, theme, characters, content_type):
 
 
 
60
  caption = generate_caption(image)
61
+ generated_text = generate_text(caption, theme, characters, content_type)
62
 
63
+ # Save text to .txt file
64
  txt_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding="utf-8")
65
  txt_file.write(generated_text)
66
  txt_file.close()
67
 
68
+ # Generate audio with gTTS (English only)
 
 
69
  audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name
70
+ try:
71
+ tts = gTTS(text=generated_text, lang="en")
72
+ tts.save(audio_path)
73
+ except Exception as e:
74
+ return f"Audio generation error: {str(e)}", txt_file.name, None
75
 
76
  return generated_text, txt_file.name, audio_path
77
 
78
+ # Gradio UI
79
+ with gr.Blocks(title="AI Story & Poem Generator") as demo:
80
+ gr.Markdown("## 🎭 AI Story & Poem Generator")
81
+ gr.Markdown("Upload an image, enter a theme and characters, and get a creative story or poem with audio!")
82
 
83
  with gr.Row():
84
  image = gr.Image(type="pil", label="πŸ–ΌοΈ Upload Image")
85
 
 
86
  with gr.Row():
87
+ theme = gr.Textbox(label="🎨 Theme (e.g., Adventure, Friendship, Dreams)")
88
  characters = gr.Textbox(label="πŸ§‘β€πŸ€β€πŸ§‘ Characters (Optional)")
89
 
90
  content_type = gr.Radio(["Poem", "Story"], label="πŸ“ Choose Content Type")
91
  generate_btn = gr.Button("✨ Generate")
92
 
93
+ output_text = gr.Textbox(label="πŸ“œ Generated Output", lines=10)
94
+ txt_file = gr.File(label="πŸ“„ Download Text")
95
+ audio_file = gr.Audio(label="πŸ”Š Listen to Audio")
96
 
97
  generate_btn.click(
98
+ fn=generate_output,
99
+ inputs=[image, theme, characters, content_type],
100
+ outputs=[output_text, txt_file, audio_file]
101
  )
102
 
 
 
 
103
  if __name__ == "__main__":
104
  demo.launch()