Nrnaidu commited on
Commit
f029b7e
·
verified ·
1 Parent(s): 03cad5f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -0
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Load text generation model and tokenizer
2
+ device = "cuda" if torch.cuda.is_available() else "cpu"
3
+ text_generation_model = AutoModelForCausalLM.from_pretrained("gpt2").to(device) # Move model to the correct device
4
+ text_generation_tokenizer = AutoTokenizer.from_pretrained("gpt2")
5
+
6
+ # Set the padding token
7
+ text_generation_tokenizer.pad_token = text_generation_tokenizer.eos_token # Use EOS token as padding token
8
+
9
+ # Function to transcribe, translate, analyze sentiment, and generate image
10
+ def process_audio(audio_path, image_option):
11
+ if audio_path is None:
12
+ return "Please upload an audio file.", None, None, None
13
+
14
+ # Step 1: Transcribe audio
15
+ try:
16
+ with open(audio_path, "rb") as file:
17
+ transcription = client.audio.transcriptions.create(
18
+ file=(os.path.basename(audio_path), file.read()),
19
+ model="whisper-large-v3",
20
+ language="ta",
21
+ response_format="verbose_json",
22
+ )
23
+ tamil_text = transcription.text
24
+ except Exception as e:
25
+ return f"An error occurred during transcription: {str(e)}", None, None, None
26
+
27
+ # Step 2: Translate Tamil to English
28
+ try:
29
+ translator = GoogleTranslator(source='ta', target='en')
30
+ translation = translator.translate(tamil_text)
31
+ except Exception as e:
32
+ return tamil_text, f"An error occurred during translation: {str(e)}", None, None
33
+
34
+ # Step 3: Generate creative text
35
+ def generate_creative_text(english_text):
36
+ if not english_text:
37
+ return "Please provide text to generate creative content."
38
+
39
+ try:
40
+ inputs = text_generation_tokenizer(english_text, return_tensors="pt", padding=True, truncation=True).to(device) # Move inputs to the same device
41
+ generated_tokens = text_generation_model.generate(
42
+ **inputs,
43
+ max_length=60,
44
+ num_return_sequences=1,
45
+ no_repeat_ngram_size=3,
46
+ temperature=0.7,
47
+ top_p=0.9,
48
+ do_sample=True,
49
+ early_stopping=True
50
+ )
51
+ creative_text = text_generation_tokenizer.decode(generated_tokens[0], skip_special_tokens=True).strip()
52
+ return creative_text
53
+ except Exception as e:
54
+ return f"An error occurred during text generation: {str(e)}"
55
+
56
+ creative_text = generate_creative_text(translation)
57
+
58
+ # Step 4: Generate image (if selected)
59
+ image = None
60
+ if image_option == "Generate Image":
61
+ try:
62
+ image = pipe(translation).images[0]
63
+ except Exception as e:
64
+ return tamil_text, translation, f"An error occurred during image generation: {str(e)}", None
65
+
66
+ return tamil_text, translation, image, creative_text
67
+
68
+ # Create Gradio interface
69
+ with gr.Blocks() as iface:
70
+ gr.Markdown("# Audio Transcription, Translation, and Image Generation")
71
+ with gr.Row():
72
+ with gr.Column():
73
+ audio_input = gr.Audio(type="filepath", label="Upload Audio File")
74
+ image_option = gr.Dropdown(["Generate Image", "Skip Image"], label="Image Generation", value="Generate Image")
75
+ submit_button = gr.Button("Process Audio")
76
+ with gr.Column():
77
+ tamil_text_output = gr.Textbox(label="Tamil Transcription", interactive=False)
78
+ translation_output = gr.Textbox(label="English Translation", interactive=False)
79
+ image_output = gr.Image(label="Generated Image")
80
+ creative_text_output = gr.Textbox(label="Creative Text", interactive=False)
81
+
82
+ submit_button.click(
83
+ fn=process_audio,
84
+ inputs=[audio_input, image_option],
85
+ outputs=[tamil_text_output, translation_output, image_output, creative_text_output]
86
+ )
87
+
88
+ # Launch the interface
89
+ iface.launch()