Anshu13 commited on
Commit
e5b2062
·
verified ·
1 Parent(s): 61e8692

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py.py +117 -0
  2. requirements.txt +0 -0
app.py.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ from PIL import Image
4
+ import whisper
5
+ from transformers import AutoProcessor, AutoModelForImageTextToText
6
+
7
+ processor = AutoProcessor.from_pretrained("deepseek-community/Janus-Pro-1B")
8
+ model = AutoModelForImageTextToText.from_pretrained("deepseek-community/Janus-Pro-1B")
9
+ whisper_model = whisper.load_model("base")
10
+
11
+ def build_instruction(user_text):
12
+ return f"""
13
+ You are a professional AI prompt engineer.
14
+
15
+ Convert the input into a highly detailed AI generation prompt.
16
+
17
+ Include:
18
+ - Subject
19
+ - Environment
20
+ - Summary
21
+
22
+ Make it visually rich and optimized for all existing AI models.
23
+ The Prompt should e detailed prompt about words ranging from 50 to 70.
24
+
25
+ Input: {user_text}
26
+
27
+ Return only the final prompt.
28
+ """
29
+ def text_to_prompt(user_text):
30
+ instruction = build_instruction(user_text)
31
+
32
+ inputs = processor(
33
+ text=instruction,
34
+ return_tensors="pt"
35
+ ).to(model.device)
36
+
37
+ input_len = inputs.input_ids.shape[1]
38
+ output = model.generate(**inputs, max_new_tokens=150)
39
+ generated_tokens = output[0][input_len:]
40
+
41
+ return processor.decode(generated_tokens, skip_special_tokens=True)
42
+
43
+ def image_text_to_prompt(image_path, user_text):
44
+ image = Image.open(image_path)
45
+ instruction = build_instruction(user_text)
46
+
47
+ inputs = processor(
48
+ images=image,
49
+ text=instruction,
50
+ return_tensors="pt"
51
+ ).to(model.device)
52
+
53
+ input_len = inputs.input_ids.shape[1]
54
+ output = model.generate(**inputs, max_new_tokens=150)
55
+ generated_tokens = output[0][input_len:]
56
+
57
+ return processor.decode(generated_tokens, skip_special_tokens=True)
58
+
59
+ def audio_to_prompt(audio_path):
60
+ result = whisper_model.transcribe(audio_path)
61
+ text = result["text"]
62
+ return text_to_prompt(text)
63
+
64
+ def generate_prompt_ui(input_type, text, image, audio):
65
+
66
+ if input_type == "Text":
67
+ return text_to_prompt(text)
68
+
69
+ elif input_type == "Image + Text":
70
+ if image is None:
71
+ return "Please upload an image"
72
+ return image_text_to_prompt(image, text)
73
+
74
+ elif input_type == "Audio":
75
+ if audio is None:
76
+ return "Please upload audio"
77
+ return audio_to_prompt(audio)
78
+
79
+ return "Invalid input"
80
+
81
+ with gr.Blocks() as app:
82
+
83
+ gr.Markdown("# 🧠 AI Prompt Generator")
84
+
85
+ input_type = gr.Radio(
86
+ ["Text", "Image + Text", "Audio"],
87
+ label="Select Input Type"
88
+ )
89
+
90
+ text_input = gr.Textbox(label="Enter your idea/prompt")
91
+
92
+ image_input = gr.Image(type="filepath", label="Upload Image")
93
+
94
+ audio_input = gr.Audio(type="filepath", label="Upload Audio")
95
+
96
+ output = gr.Textbox(label="Generated Prompt")
97
+
98
+ generate_btn = gr.Button("Generate Prompt 🚀")
99
+
100
+ def update_inputs(choice):
101
+ return (
102
+ gr.update(visible=(choice == "Text" or choice == "Image + Text")),
103
+ gr.update(visible=(choice == "Image + Text")),
104
+ gr.update(visible=(choice == "Audio"))
105
+ )
106
+ input_type.change(
107
+ fn=update_inputs,
108
+ inputs=input_type,
109
+ outputs=[text_input, image_input, audio_input]
110
+ )
111
+ generate_btn.click(
112
+ fn=generate_prompt_ui,
113
+ inputs=[input_type, text_input, image_input, audio_input],
114
+ outputs=output
115
+ )
116
+
117
+ app.launch()
requirements.txt ADDED
File without changes