Nrnaidu commited on
Commit
43a9bb7
·
verified ·
1 Parent(s): 32c1255

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -0
app.py CHANGED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ import requests
4
+ import gradio as gr
5
+ from groq import Groq
6
+ from transformers import MarianMTModel, MarianTokenizer, AutoModelForCausalLM, AutoTokenizer
7
+ from deep_translator import GoogleTranslator
8
+ from PIL import Image, ImageDraw
9
+ import joblib
10
+ import time
11
+ import torch
12
+ import warnings
13
+ from huggingface_hub import InferenceApi
14
+ from diffusers import StableDiffusionPipeline
15
+ # Load text generation model and tokenizer
16
+ device = "cuda" if torch.cuda.is_available() else "cpu"
17
+ text_generation_model = AutoModelForCausalLM.from_pretrained("gpt2").to(device) # Move model to the correct device
18
+ text_generation_tokenizer = AutoTokenizer.from_pretrained("gpt2")
19
+
20
+ # Set the padding token
21
+ text_generation_tokenizer.pad_token = text_generation_tokenizer.eos_token # Use EOS token as padding token
22
+
23
+ # Function to transcribe, translate, analyze sentiment, and generate image
24
+ def process_audio(audio_path, image_option):
25
+ if audio_path is None:
26
+ return "Please upload an audio file.", None, None, None
27
+
28
+ # Step 1: Transcribe audio
29
+ try:
30
+ with open(audio_path, "rb") as file:
31
+ transcription = client.audio.transcriptions.create(
32
+ file=(os.path.basename(audio_path), file.read()),
33
+ model="whisper-large-v3",
34
+ language="ta",
35
+ response_format="verbose_json",
36
+ )
37
+ tamil_text = transcription.text
38
+ except Exception as e:
39
+ return f"An error occurred during transcription: {str(e)}", None, None, None
40
+
41
+ # Step 2: Translate Tamil to English
42
+ try:
43
+ translator = GoogleTranslator(source='ta', target='en')
44
+ translation = translator.translate(tamil_text)
45
+ except Exception as e:
46
+ return tamil_text, f"An error occurred during translation: {str(e)}", None, None
47
+
48
+ # Step 3: Generate creative text
49
+ def generate_creative_text(english_text):
50
+ if not english_text:
51
+ return "Please provide text to generate creative content."
52
+
53
+ try:
54
+ inputs = text_generation_tokenizer(english_text, return_tensors="pt", padding=True, truncation=True).to(device) # Move inputs to the same device
55
+ generated_tokens = text_generation_model.generate(
56
+ **inputs,
57
+ max_length=60,
58
+ num_return_sequences=1,
59
+ no_repeat_ngram_size=3,
60
+ temperature=0.7,
61
+ top_p=0.9,
62
+ do_sample=True,
63
+ early_stopping=True
64
+ )
65
+ creative_text = text_generation_tokenizer.decode(generated_tokens[0], skip_special_tokens=True).strip()
66
+ return creative_text
67
+ except Exception as e:
68
+ return f"An error occurred during text generation: {str(e)}"
69
+
70
+ creative_text = generate_creative_text(translation)
71
+
72
+ # Step 4: Generate image (if selected)
73
+ image = None
74
+ if image_option == "Generate Image":
75
+ try:
76
+ image = pipe(translation).images[0]
77
+ except Exception as e:
78
+ return tamil_text, translation, f"An error occurred during image generation: {str(e)}", None
79
+
80
+ return tamil_text, translation, image, creative_text
81
+
82
+ # Create Gradio interface
83
+ with gr.Blocks() as iface:
84
+ gr.Markdown("# Audio Transcription, Translation, and Image Generation")
85
+ with gr.Row():
86
+ with gr.Column():
87
+ audio_input = gr.Audio(type="filepath", label="Upload Audio File")
88
+ image_option = gr.Dropdown(["Generate Image", "Skip Image"], label="Image Generation", value="Generate Image")
89
+ submit_button = gr.Button("Process Audio")
90
+ with gr.Column():
91
+ tamil_text_output = gr.Textbox(label="Tamil Transcription", interactive=False)
92
+ translation_output = gr.Textbox(label="English Translation", interactive=False)
93
+ image_output = gr.Image(label="Generated Image")
94
+ creative_text_output = gr.Textbox(label="Creative Text", interactive=False)
95
+
96
+ submit_button.click(
97
+ fn=process_audio,
98
+ inputs=[audio_input, image_option],
99
+ outputs=[tamil_text_output, translation_output, image_output, creative_text_output]
100
+ )
101
+
102
+ # Launch the interface
103
+ iface.launch()