Nrnaidu commited on
Commit
b39fe4b
·
verified ·
1 Parent(s): a155921

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -36
app.py CHANGED
@@ -2,105 +2,112 @@ import whisper
2
  import gradio as gr
3
  from groq import Groq
4
  from deep_translator import GoogleTranslator
5
- from PIL import Image
6
  import os
 
 
 
 
7
  import requests
8
  import io
9
  import time
10
-
11
  # Set up Groq API key
12
  api_key = os.getenv("GROQ_API_KEY")
13
  client = Groq(api_key=api_key)
14
-
15
  # Hugging Face API details for image generation
16
  H_key = os.getenv("Hugging_api_key")
17
  API_URL = "https://api-inference.huggingface.co/models/Artples/LAI-ImageGeneration-vSDXL-2"
18
  headers = {"Authorization": f"Bearer {H_key}"}
19
-
 
20
  # Function for querying image generation with retries
21
  def query_image_generation(payload, max_retries=5):
22
  for attempt in range(max_retries):
23
  response = requests.post(API_URL, headers=headers, json=payload)
24
-
25
  if response.status_code == 503:
26
  print(f"Model is still loading, retrying... Attempt {attempt + 1}/{max_retries}")
27
  estimated_time = min(response.json().get("estimated_time", 60), 60)
28
  time.sleep(estimated_time)
29
  continue
30
-
31
  if response.status_code != 200:
32
  print(f"Error: Received status code {response.status_code}")
33
  print(f"Response: {response.text}")
34
  return None
35
-
36
  return response.content
37
-
38
  print(f"Failed to generate image after {max_retries} attempts.")
39
  return None
40
-
41
  # Function for generating an image from text
42
  def generate_image(prompt):
43
  image_bytes = query_image_generation({"inputs": prompt})
44
-
45
  if image_bytes is None:
46
  return None
47
-
48
  try:
49
  image = Image.open(io.BytesIO(image_bytes)) # Opening the image from bytes
50
  return image
51
  except Exception as e:
52
  print(f"Error: {e}")
53
  return None
54
-
 
55
  # Updated function for text generation using the new API structure
56
  def generate_creative_text(prompt):
57
  chat_completion = client.chat.completions.create(
58
- messages=[
59
- {"role": "user", "content": prompt}
60
- ],
61
- model="llama-3.2-90b-text-preview"
62
- )
63
  chatbot_response = chat_completion.choices[0].message.content
64
  return chatbot_response
65
-
 
66
  def process_audio(audio_path, image_option, creative_text_option):
67
  if audio_path is None:
68
  return "Please upload an audio file.", None, None, None
69
-
70
  # Step 1: Transcribe audio
71
  try:
72
  with open(audio_path, "rb") as file:
73
  transcription = client.audio.transcriptions.create(
74
  file=(os.path.basename(audio_path), file.read()),
75
  model="whisper-large-v3",
76
- language="kn", # Change to Kannada
77
  response_format="verbose_json",
78
  )
79
- kannada_text = transcription.text # Changed from tamil_text to kannada_text
80
  except Exception as e:
81
  return f"An error occurred during transcription: {str(e)}", None, None, None
82
-
83
- # Step 2: Translate Kannada to English
84
  try:
85
- translator = GoogleTranslator(source='kn', target='en')
86
- translation = translator.translate(kannada_text) # Changed from tamil_text to kannada_text
87
  except Exception as e:
88
- return kannada_text, f"An error occurred during translation: {str(e)}", None, None
89
-
90
  # Step 3: Generate creative text (if selected)
91
  creative_text = None
92
  if creative_text_option == "Generate Creative Text":
93
  creative_text = generate_creative_text(translation)
94
-
95
  # Step 4: Generate image (if selected)
96
  image = None
97
  if image_option == "Generate Image":
98
  image = generate_image(translation)
99
  if image is None:
100
- return kannada_text, translation, creative_text, "An error occurred during image generation."
101
-
102
- return kannada_text, translation, creative_text, image
103
-
 
104
  # Create Gradio interface
105
  with gr.Blocks(theme=gr.themes.Base()) as iface:
106
  gr.Markdown("# Audio Transcription, Translation, Image & Creative Text Generation")
@@ -111,15 +118,15 @@ with gr.Blocks(theme=gr.themes.Base()) as iface:
111
  creative_text_option = gr.Dropdown(["Generate Creative Text", "Skip Creative Text"], label="Creative Text Generation", value="Generate Creative Text")
112
  submit_button = gr.Button("Process Audio")
113
  with gr.Column():
114
- kannada_text_output = gr.Textbox(label="Kannada Transcription") # Changed to Kannada
115
  translation_output = gr.Textbox(label="English Translation")
116
  creative_text_output = gr.Textbox(label="Creative Text")
117
  image_output = gr.Image(label="Generated Image")
118
  submit_button.click(
119
  fn=process_audio,
120
  inputs=[audio_input, image_option, creative_text_option],
121
- outputs=[kannada_text_output, translation_output, creative_text_output, image_output]
122
  )
123
-
124
  # Launch the interface
125
- iface.launch()
 
2
  import gradio as gr
3
  from groq import Groq
4
  from deep_translator import GoogleTranslator
5
+ from diffusers import StableDiffusionPipeline
6
  import os
7
+ import torch
8
+ import openai
9
+ from huggingface_hub import InferenceApi
10
+ from PIL import Image
11
  import requests
12
  import io
13
  import time
14
+
15
  # Set up Groq API key
16
  api_key = os.getenv("GROQ_API_KEY")
17
  client = Groq(api_key=api_key)
18
+
19
  # Hugging Face API details for image generation
20
  H_key = os.getenv("Hugging_api_key")
21
  API_URL = "https://api-inference.huggingface.co/models/Artples/LAI-ImageGeneration-vSDXL-2"
22
  headers = {"Authorization": f"Bearer {H_key}"}
23
+
24
+
25
  # Function for querying image generation with retries
26
  def query_image_generation(payload, max_retries=5):
27
  for attempt in range(max_retries):
28
  response = requests.post(API_URL, headers=headers, json=payload)
29
+
30
  if response.status_code == 503:
31
  print(f"Model is still loading, retrying... Attempt {attempt + 1}/{max_retries}")
32
  estimated_time = min(response.json().get("estimated_time", 60), 60)
33
  time.sleep(estimated_time)
34
  continue
35
+
36
  if response.status_code != 200:
37
  print(f"Error: Received status code {response.status_code}")
38
  print(f"Response: {response.text}")
39
  return None
40
+
41
  return response.content
42
+
43
  print(f"Failed to generate image after {max_retries} attempts.")
44
  return None
45
+
46
  # Function for generating an image from text
47
  def generate_image(prompt):
48
  image_bytes = query_image_generation({"inputs": prompt})
49
+
50
  if image_bytes is None:
51
  return None
52
+
53
  try:
54
  image = Image.open(io.BytesIO(image_bytes)) # Opening the image from bytes
55
  return image
56
  except Exception as e:
57
  print(f"Error: {e}")
58
  return None
59
+
60
+
61
  # Updated function for text generation using the new API structure
62
  def generate_creative_text(prompt):
63
  chat_completion = client.chat.completions.create(
64
+ messages=[
65
+ {"role": "user", "content":prompt}
66
+ ],
67
+ model="llama-3.2-90b-text-preview"
68
+ )
69
  chatbot_response = chat_completion.choices[0].message.content
70
  return chatbot_response
71
+
72
+
73
  def process_audio(audio_path, image_option, creative_text_option):
74
  if audio_path is None:
75
  return "Please upload an audio file.", None, None, None
76
+
77
  # Step 1: Transcribe audio
78
  try:
79
  with open(audio_path, "rb") as file:
80
  transcription = client.audio.transcriptions.create(
81
  file=(os.path.basename(audio_path), file.read()),
82
  model="whisper-large-v3",
83
+ language="ta",
84
  response_format="verbose_json",
85
  )
86
+ tamil_text = transcription.text
87
  except Exception as e:
88
  return f"An error occurred during transcription: {str(e)}", None, None, None
89
+ # Step 2: Translate Tamil to English
 
90
  try:
91
+ translator = GoogleTranslator(source='ta', target='en')
92
+ translation = translator.translate(tamil_text)
93
  except Exception as e:
94
+ return tamil_text, f"An error occurred during translation: {str(e)}", None, None
95
+
96
  # Step 3: Generate creative text (if selected)
97
  creative_text = None
98
  if creative_text_option == "Generate Creative Text":
99
  creative_text = generate_creative_text(translation)
100
+
101
  # Step 4: Generate image (if selected)
102
  image = None
103
  if image_option == "Generate Image":
104
  image = generate_image(translation)
105
  if image is None:
106
+ return tamil_text, translation, creative_text, f"An error occurred during image generation"
107
+
108
+ return tamil_text, translation, creative_text, image
109
+
110
+
111
  # Create Gradio interface
112
  with gr.Blocks(theme=gr.themes.Base()) as iface:
113
  gr.Markdown("# Audio Transcription, Translation, Image & Creative Text Generation")
 
118
  creative_text_option = gr.Dropdown(["Generate Creative Text", "Skip Creative Text"], label="Creative Text Generation", value="Generate Creative Text")
119
  submit_button = gr.Button("Process Audio")
120
  with gr.Column():
121
+ tamil_text_output = gr.Textbox(label="Tamil Transcription")
122
  translation_output = gr.Textbox(label="English Translation")
123
  creative_text_output = gr.Textbox(label="Creative Text")
124
  image_output = gr.Image(label="Generated Image")
125
  submit_button.click(
126
  fn=process_audio,
127
  inputs=[audio_input, image_option, creative_text_option],
128
+ outputs=[tamil_text_output, translation_output, creative_text_output, image_output]
129
  )
130
+
131
  # Launch the interface
132
+ iface.launch()