Nrnaidu commited on
Commit
c6ebc5f
·
verified ·
1 Parent(s): 80c898c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -55
app.py CHANGED
@@ -1,28 +1,76 @@
1
- import os
2
- import io
3
- import requests
4
  import gradio as gr
5
  from groq import Groq
6
- from transformers import MarianMTModel, MarianTokenizer, AutoModelForCausalLM, AutoTokenizer
7
  from deep_translator import GoogleTranslator
8
- from PIL import Image, ImageDraw
9
- import joblib
10
- import time
11
  import torch
12
- import warnings
13
  from huggingface_hub import InferenceApi
14
- from diffusers import StableDiffusionPipeline
15
-
16
- # Load text generation model and tokenizer
17
- device = "cuda" if torch.cuda.is_available() else "cpu"
18
- text_generation_model = AutoModelForCausalLM.from_pretrained("gpt2").to(device) # Move model to the correct device
19
- text_generation_tokenizer = AutoTokenizer.from_pretrained("gpt2")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- # Set the padding token
22
- text_generation_tokenizer.pad_token = text_generation_tokenizer.eos_token # Use EOS token as padding token
23
 
24
- # Function to transcribe, translate, analyze sentiment, and generate image
25
- def process_audio(audio_path, image_option):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  if audio_path is None:
27
  return "Please upload an audio file.", None, None, None
28
 
@@ -38,7 +86,6 @@ def process_audio(audio_path, image_option):
38
  tamil_text = transcription.text
39
  except Exception as e:
40
  return f"An error occurred during transcription: {str(e)}", None, None, None
41
-
42
  # Step 2: Translate Tamil to English
43
  try:
44
  translator = GoogleTranslator(source='ta', target='en')
@@ -46,58 +93,39 @@ def process_audio(audio_path, image_option):
46
  except Exception as e:
47
  return tamil_text, f"An error occurred during translation: {str(e)}", None, None
48
 
49
- # Step 3: Generate creative text
50
- def generate_creative_text(english_text):
51
- if not english_text:
52
- return "Please provide text to generate creative content."
53
-
54
- try:
55
- inputs = text_generation_tokenizer(english_text, return_tensors="pt", padding=True, truncation=True).to(device) # Move inputs to the same device
56
- generated_tokens = text_generation_model.generate(
57
- **inputs,
58
- max_length=60,
59
- num_return_sequences=1,
60
- no_repeat_ngram_size=3,
61
- temperature=0.7,
62
- top_p=0.9,
63
- do_sample=True,
64
- early_stopping=True
65
- )
66
- creative_text = text_generation_tokenizer.decode(generated_tokens[0], skip_special_tokens=True).strip()
67
- return creative_text
68
- except Exception as e:
69
- return f"An error occurred during text generation: {str(e)}"
70
-
71
- creative_text = generate_creative_text(translation)
72
 
73
  # Step 4: Generate image (if selected)
74
  image = None
75
  if image_option == "Generate Image":
76
- try:
77
- image = pipe(translation).images[0]
78
- except Exception as e:
79
- return tamil_text, translation, f"An error occurred during image generation: {str(e)}", None
 
80
 
81
- return tamil_text, translation, image, creative_text
82
 
83
  # Create Gradio interface
84
- with gr.Blocks() as iface:
85
- gr.Markdown("# Audio Transcription, Translation, and Image Generation")
86
  with gr.Row():
87
  with gr.Column():
88
  audio_input = gr.Audio(type="filepath", label="Upload Audio File")
89
  image_option = gr.Dropdown(["Generate Image", "Skip Image"], label="Image Generation", value="Generate Image")
 
90
  submit_button = gr.Button("Process Audio")
91
  with gr.Column():
92
- tamil_text_output = gr.Textbox(label="Tamil Transcription", interactive=False)
93
- translation_output = gr.Textbox(label="English Translation", interactive=False)
 
94
  image_output = gr.Image(label="Generated Image")
95
- creative_text_output = gr.Textbox(label="Creative Text", interactive=False)
96
-
97
  submit_button.click(
98
  fn=process_audio,
99
- inputs=[audio_input, image_option],
100
- outputs=[tamil_text_output, translation_output, image_output, creative_text_output]
101
  )
102
 
103
  # Launch the interface
 
1
+ import whisper
 
 
2
  import gradio as gr
3
  from groq import Groq
 
4
  from deep_translator import GoogleTranslator
5
+ from diffusers import StableDiffusionPipeline
6
+ import os
 
7
  import torch
8
+ import openai
9
  from huggingface_hub import InferenceApi
10
+ from PIL import Image
11
+ import requests
12
+ import io
13
+ import time
14
+
15
+ # Set up Groq API key
16
+ api_key = os.getenv("g_key")
17
+ client = Groq(api_key=api_key)
18
+
19
+ # Hugging Face API details for image generation
20
+ key = os.getenv("h_key")
21
+ API_URL = "https://api-inference.huggingface.co/models/Artples/LAI-ImageGeneration-vSDXL-2"
22
+ headers = {"Authorization": f"Bearer {key}"}
23
+
24
+
25
+ # Function for querying image generation with retries
26
+ def query_image_generation(payload, max_retries=5):
27
+ for attempt in range(max_retries):
28
+ response = requests.post(API_URL, headers=headers, json=payload)
29
+
30
+ if response.status_code == 503:
31
+ print(f"Model is still loading, retrying... Attempt {attempt + 1}/{max_retries}")
32
+ estimated_time = min(response.json().get("estimated_time", 60), 60)
33
+ time.sleep(estimated_time)
34
+ continue
35
+
36
+ if response.status_code != 200:
37
+ print(f"Error: Received status code {response.status_code}")
38
+ print(f"Response: {response.text}")
39
+ return None
40
 
41
+ return response.content
 
42
 
43
+ print(f"Failed to generate image after {max_retries} attempts.")
44
+ return None
45
+
46
+ # Function for generating an image from text
47
+ def generate_image(prompt):
48
+ image_bytes = query_image_generation({"inputs": prompt})
49
+
50
+ if image_bytes is None:
51
+ return None
52
+
53
+ try:
54
+ image = Image.open(io.BytesIO(image_bytes)) # Opening the image from bytes
55
+ return image
56
+ except Exception as e:
57
+ print(f"Error: {e}")
58
+ return None
59
+
60
+
61
+ # Updated function for text generation using the new API structure
62
+ def generate_creative_text(prompt):
63
+ chat_completion = client.chat.completions.create(
64
+ messages=[
65
+ {"role": "user", "content":prompt}
66
+ ],
67
+ model="llama-3.2-90b-text-preview"
68
+ )
69
+ chatbot_response = chat_completion.choices[0].message.content
70
+ return chatbot_response
71
+
72
+
73
+ def process_audio(audio_path, image_option, creative_text_option):
74
  if audio_path is None:
75
  return "Please upload an audio file.", None, None, None
76
 
 
86
  tamil_text = transcription.text
87
  except Exception as e:
88
  return f"An error occurred during transcription: {str(e)}", None, None, None
 
89
  # Step 2: Translate Tamil to English
90
  try:
91
  translator = GoogleTranslator(source='ta', target='en')
 
93
  except Exception as e:
94
  return tamil_text, f"An error occurred during translation: {str(e)}", None, None
95
 
96
+ # Step 3: Generate creative text (if selected)
97
+ creative_text = None
98
+ if creative_text_option == "Generate Creative Text":
99
+ creative_text = generate_creative_text(translation)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
  # Step 4: Generate image (if selected)
102
  image = None
103
  if image_option == "Generate Image":
104
+ image = generate_image(translation)
105
+ if image is None:
106
+ return tamil_text, translation, creative_text, f"An error occurred during image generation"
107
+
108
+ return tamil_text, translation, creative_text, image
109
 
 
110
 
111
  # Create Gradio interface
112
+ with gr.Blocks(theme=gr.themes.Base()) as iface:
113
+ gr.Markdown("# Audio Transcription, Translation, Image & Creative Text Generation")
114
  with gr.Row():
115
  with gr.Column():
116
  audio_input = gr.Audio(type="filepath", label="Upload Audio File")
117
  image_option = gr.Dropdown(["Generate Image", "Skip Image"], label="Image Generation", value="Generate Image")
118
+ creative_text_option = gr.Dropdown(["Generate Creative Text", "Skip Creative Text"], label="Creative Text Generation", value="Generate Creative Text")
119
  submit_button = gr.Button("Process Audio")
120
  with gr.Column():
121
+ tamil_text_output = gr.Textbox(label="Tamil Transcription")
122
+ translation_output = gr.Textbox(label="English Translation")
123
+ creative_text_output = gr.Textbox(label="Creative Text")
124
  image_output = gr.Image(label="Generated Image")
 
 
125
  submit_button.click(
126
  fn=process_audio,
127
+ inputs=[audio_input, image_option, creative_text_option],
128
+ outputs=[tamil_text_output, translation_output, creative_text_output, image_output]
129
  )
130
 
131
  # Launch the interface