File size: 6,690 Bytes
3599c67 33a1aec 2c34fac 3599c67 33a1aec 2c34fac 07ae5cf 2c34fac 348b585 2c34fac 3599c67 90aaafe 2c34fac 3599c67 b2808b2 3599c67 b2808b2 2c34fac 33a1aec 2c34fac 3599c67 b2808b2 2c34fac 1bd96c2 33a1aec 2c34fac 33a1aec 1bd96c2 7cfae3c 1bd96c2 3599c67 b2808b2 3599c67 6836297 3599c67 33a1aec 3599c67 6836297 3599c67 b2808b2 3599c67 33a1aec 3599c67 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
import gradio as gr
import tempfile
import os
from gtts import gTTS
from deep_translator import GoogleTranslator
from groq import Groq
import logging
from sentence_transformers import SentenceTransformer
import numpy as np
logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')
# Initialize Groq client
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# Initialize HuggingFace embeddings (free to use)
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
indexed_texts = []
indexed_embeddings = []
# Translation languages dropdown options
translation_languages = {
"English": "en",
"Arabic": "ar",
"Hindi": "hi",
"Kannada": "kn",
"Marathi": "mr",
"Telugu": "te",
"Tamil": "ta",
"Gujarati": "gu",
"Malayalam": "ml"
}
# Define supported languages for Google TTS
audio_language_dict = {
"English": {"code": "en"},
"Arabic": {"code": "ar"},
"Hindi": {"code": "hi"},
"Kannada": {"code": "kn"},
"Marathi": {"code": "mr"},
"Telugu": {"code": "te"},
"Tamil": {"code": "ta"},
"Gujarati": {"code": "gu"},
"Malayalam": {"code": "ml"}
}
def index_text(text: str) -> str:
global indexed_texts, indexed_embeddings
try:
# Split the text into sentences or smaller chunks
chunks = text.split('. ')
for chunk in chunks:
if chunk:
embedding = sentence_model.encode([chunk])[0]
indexed_texts.append(chunk)
indexed_embeddings.append(embedding)
return f"Text indexed successfully. Total indexed chunks: {len(indexed_texts)}"
except Exception as e:
return f"Error indexing text: {str(e)}"
def clear_index() -> str:
global indexed_texts, indexed_embeddings
indexed_texts.clear()
indexed_embeddings.clear()
return "Index cleared successfully. Ready for new indexing."
def find_most_similar(query: str, top_k: int = 3) -> list:
if not indexed_texts:
return ["No indexed text available."]
query_embedding = sentence_model.encode([query])[0]
similarities = [np.dot(query_embedding, doc_embedding) for doc_embedding in indexed_embeddings]
top_indices = np.argsort(similarities)[-top_k:][::-1]
return [indexed_texts[i] for i in top_indices]
def chat_with_context(question: str, model: str) -> str:
if not indexed_texts:
return "Please index some text first."
relevant_contexts = find_most_similar(question, top_k=3)
context = " ".join(relevant_contexts)
try:
prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
chat_completion = groq_client.chat.completions.create(
messages=[
{
"role": "user",
"content": prompt,
}
],
model=model,
max_tokens=500 # Limit the response length
)
return chat_completion.choices[0].message.content
except Exception as e:
logging.error(f"Error in chat: {str(e)}")
return f"Error in chat: {str(e)}"
# Translation function
def translate_text(text, target_lang_code):
try:
translator = GoogleTranslator(source='auto', target=target_lang_code)
return translator.translate(text)
except Exception as e:
return f"Translation Error: {str(e)}"
# Google TTS function
def google_tts(text, lang):
try:
tts = gTTS(text=text, lang=lang, slow=False)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
tts.save(temp_audio.name)
return temp_audio.name, f"Speech generated with Google TTS using {lang} language"
except Exception as e:
return None, f"Error in Google TTS: {str(e)}"
with gr.Blocks() as iface:
gr.Markdown("# Free Text-to-Speech Tool with Language Translation and Chat")
with gr.Row():
text_input = gr.Textbox(label="Enter text for translation and speech generation", lines=3)
with gr.Row():
translation_lang_dropdown = gr.Dropdown(list(translation_languages.keys()), label="Select Translation Language", value="English")
convert_button = gr.Button("Convert")
translated_text = gr.Textbox(label="Translated Text")
with gr.Row():
index_button = gr.Button("Index")
clear_index_button = gr.Button("Clear Index")
index_status = gr.Textbox(label="Indexing Status")
use_chat = gr.Checkbox(label="Use Chat for TTS input", value=False)
chat_group = gr.Group(visible=False)
with chat_group:
chat_input = gr.Textbox(label="Ask a question about the indexed text")
chat_model = gr.Dropdown(
choices=["llama3-70b-8192", "mixtral-8x7b-32768", "gemma-7b-it"],
label="Select Chat Model",
value="llama3-70b-8192"
)
chat_button = gr.Button("Ask")
chat_output = gr.Textbox(label="Answer", interactive=False)
with gr.Group() as tts_options:
audio_lang_dropdown = gr.Dropdown(list(audio_language_dict.keys()), label="Select Audio Language", value="English")
generate_button = gr.Button("Generate Speech")
audio_output = gr.Audio(label="Generated Speech")
message_output = gr.Textbox(label="Message")
def update_chat_visibility(use_chat):
return gr.update(visible=use_chat)
def convert_text(text, translation_lang):
target_code = translation_languages[translation_lang]
translated = translate_text(text, target_code)
return translated
def generate_speech(text, audio_lang, use_chat, chat_output):
if use_chat and chat_output:
text = chat_output
logging.info(f"Generating speech: lang={audio_lang}")
try:
return google_tts(text, audio_language_dict[audio_lang]["code"])
except Exception as e:
logging.error(f"Error generating speech: {str(e)}")
return None, f"Error generating speech: {str(e)}"
convert_button.click(convert_text, inputs=[text_input, translation_lang_dropdown], outputs=translated_text)
index_button.click(index_text, inputs=[translated_text], outputs=[index_status])
clear_index_button.click(clear_index, outputs=[index_status])
use_chat.change(update_chat_visibility, inputs=[use_chat], outputs=[chat_group])
chat_button.click(chat_with_context, inputs=[chat_input, chat_model], outputs=[chat_output])
generate_button.click(
generate_speech,
inputs=[translated_text, audio_lang_dropdown, use_chat, chat_output],
outputs=[audio_output, message_output]
)
iface.launch() |