Abhaykumar04 commited on
Commit
d5f0495
·
verified ·
1 Parent(s): 1567604

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +163 -0
app.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import assemblyai as aai
3
+ from together import Together
4
+ import base64
5
+ from io import BytesIO
6
+ from PIL import Image
7
+ import os
8
+ import yaml
9
+
10
+
11
+ # Function to load API credentials
12
+ def load_credentials():
13
+ assemblyai_key = os.getenv("ASSEMBLYAI_API_KEY")
14
+ together_key = os.getenv("TOGETHER_API_KEY")
15
+
16
+ if not assemblyai_key or not together_key:
17
+ try:
18
+ with open('API.yml', 'r') as file:
19
+ api_creds = yaml.safe_load(file)
20
+ assemblyai_key = assemblyai_key or api_creds['assemblyai']
21
+ together_key = together_key or api_creds['Together_api']
22
+ except Exception as e:
23
+ print(f"Failed to load API credentials: {str(e)}")
24
+ return None, None
25
+
26
+ return assemblyai_key, together_key
27
+
28
+ # Initialize API clients
29
+ ASSEMBLYAI_API_KEY, TOGETHER_API_KEY = load_credentials()
30
+
31
+ if ASSEMBLYAI_API_KEY and TOGETHER_API_KEY:
32
+ aai.settings.api_key = ASSEMBLYAI_API_KEY
33
+ together_client = Together(api_key=TOGETHER_API_KEY)
34
+ else:
35
+ raise ValueError("API credentials not found. Please check your configuration.")
36
+
37
+ def transcribe_audio(audio_path):
38
+ """Transcribe audio using AssemblyAI."""
39
+ try:
40
+ transcriber = aai.Transcriber()
41
+ transcript = transcriber.transcribe(audio_path)
42
+ return transcript.text
43
+ except Exception as e:
44
+ return f"Error in transcription: {str(e)}"
45
+
46
+ def generate_image(prompt):
47
+ """Generate image using Together AI."""
48
+ try:
49
+ response = together_client.images.generate(
50
+ prompt=prompt,
51
+ model="black-forest-labs/FLUX.1-schnell-Free",
52
+ width=1024,
53
+ height=768,
54
+ steps=4,
55
+ n=1,
56
+ response_format="b64_json"
57
+ )
58
+ # Convert base64 to PIL Image
59
+ img_data = base64.b64decode(response.data[0].b64_json)
60
+ img = Image.open(BytesIO(img_data))
61
+ return img
62
+ except Exception as e:
63
+ return f"Error in image generation: {str(e)}"
64
+
65
+ def process_audio(audio, progress=gr.Progress()):
66
+ """Process audio file and generate image"""
67
+ if audio is None:
68
+ return None, "Please provide an audio input."
69
+
70
+ progress(0.3, desc="Transcribing audio...")
71
+ transcribed_text = transcribe_audio(audio)
72
+
73
+ if isinstance(transcribed_text, str) and not transcribed_text.startswith("Error"):
74
+ progress(0.6, desc="Generating image...")
75
+ generated_image = generate_image(transcribed_text)
76
+
77
+ if isinstance(generated_image, Image.Image):
78
+ progress(1.0, desc="Complete!")
79
+ return generated_image, transcribed_text
80
+ else:
81
+ return None, f"Image generation failed: {generated_image}"
82
+ else:
83
+ return None, f"Transcription failed: {transcribed_text}"
84
+
85
+ # Custom CSS for better styling
86
+ custom_css = """
87
+ #app-title {
88
+ text-align: center;
89
+ margin-bottom: 10px;
90
+ }
91
+ #app-subtitle {
92
+ text-align: center;
93
+ margin-bottom: 30px;
94
+ }
95
+ #main-container {
96
+ max-width: 1200px;
97
+ margin: auto;
98
+ }
99
+ """
100
+
101
+ # Create Gradio interface
102
+ def create_interface():
103
+ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(
104
+ primary_hue="indigo",
105
+ secondary_hue="blue",
106
+ neutral_hue="slate"
107
+ )) as app:
108
+ gr.HTML(
109
+ """
110
+ <div id="app-title">
111
+ <h1> Voice Generated Visions </h1>
112
+ </div>
113
+ <div id="app-subtitle">
114
+ <h3>✨ Transform Your Words into Stunning Visual Art ✨</h3>
115
+ </div>
116
+ """
117
+ )
118
+
119
+ with gr.Row():
120
+ with gr.Column():
121
+ audio_input = gr.Audio(
122
+ label="Record or Upload Audio",
123
+ sources=["microphone", "upload"],
124
+ type="filepath"
125
+ )
126
+ submit_btn = gr.Button("🚀 Generate Vision", variant="primary")
127
+
128
+ with gr.Column():
129
+ output_image = gr.Image(label="Generated Image 🖼️")
130
+ output_text = gr.Textbox(
131
+ label="Transcribed Text 📝",
132
+ placeholder="Your speech will appear here...",
133
+ lines=3
134
+ )
135
+
136
+ # Add usage instructions
137
+ with gr.Accordion("ℹ️ How to Use"):
138
+ gr.Markdown("""
139
+ 1. **Record or Upload** 🎤
140
+ - Click the microphone icon to record your voice
141
+ - Or upload an audio file from your device
142
+
143
+ 2. **Generate** 🎨
144
+ - Click 'Generate Vision' to process your audio
145
+ - Wait for the magic to happen!
146
+
147
+ 3. **Results** ✨
148
+ - View your transcribed text
149
+ - See your words transformed into art
150
+ """)
151
+
152
+ submit_btn.click(
153
+ fn=process_audio,
154
+ inputs=[audio_input],
155
+ outputs=[output_image, output_text]
156
+ )
157
+
158
+ return app
159
+
160
+ # Launch the app
161
+ if __name__ == "__main__":
162
+ demo = create_interface()
163
+ demo.launch(share=True)