| import cohere |
| import gradio as gr |
| from pypdf import PdfReader |
| from gtts import gTTS |
| from io import BytesIO |
| import os |
| from loguru import logger |
| import tempfile |
| from dotenv import load_dotenv |
|
|
| |
| load_dotenv() |
|
|
| |
| COHERE_API_KEY = os.getenv('COHERE_API_KEY') |
|
|
| |
| if not COHERE_API_KEY: |
| raise ValueError("Cohere API key not found. Please set the COHERE_API_KEY environment variable.") |
|
|
| cohere_client = cohere.Client(COHERE_API_KEY) |
|
|
| |
| language_options = [ |
| ("English", "en"), |
| ("Spanish", "es"), |
| ("French", "fr"), |
| ("German", "de"), |
| ("Italian", "it"), |
| ("Chinese", "zh-CN"), |
| ("Japanese", "ja"), |
| ("Hindi", "hi") |
| ] |
|
|
| |
| def extract_text_from_pdf(pdf_file): |
| reader = PdfReader(pdf_file) |
| text = "" |
| for page in reader.pages: |
| page_text = page.extract_text() |
| if page_text: |
| text += page_text |
| return text |
|
|
| |
| def text_to_speech(text, language_code): |
| if not text or not isinstance(text, str): |
| logger.error("No valid text available for speech conversion.") |
| return None |
| |
| try: |
| tts = gTTS(text, lang=language_code) |
| audio_fp = BytesIO() |
| tts.write_to_fp(audio_fp) |
| audio_fp.seek(0) |
|
|
| |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file: |
| temp_audio_file.write(audio_fp.read()) |
| temp_audio_path = temp_audio_file.name |
| return temp_audio_path |
| except Exception as e: |
| logger.error(f"Error during text-to-speech conversion: {e}") |
| return None |
|
|
| |
| def pdf_to_audio(pdf_file, language_code): |
| try: |
| text = extract_text_from_pdf(pdf_file) |
| |
| |
| if not text.strip(): |
| logger.error("The PDF contains no extractable text.") |
| return "The PDF contains no extractable text. Please try a different file.", None |
| |
| |
| response = cohere_client.generate( |
| model='c4ai-aya-23', |
| prompt=text, |
| max_tokens=500 |
| ) |
| |
| |
| if not response or not response.generations: |
| logger.error("Cohere API did not return a valid response.") |
| return "Error: Cohere API did not return a valid response.", None |
| |
| processed_text = response.generations[0].text.strip() |
| |
| |
| if not processed_text: |
| logger.error("Cohere generated an empty response.") |
| return "Error: Cohere generated an empty response.", None |
| |
| |
| audio_file_path = text_to_speech(processed_text, language_code) |
| |
| if audio_file_path is None: |
| return "Error: Failed to generate speech from the provided text.", None |
| |
| return processed_text, audio_file_path |
| except Exception as e: |
| logger.error(f"Error during PDF to audio conversion: {e}") |
| return "An error occurred while processing the PDF.", None |
|
|
| |
| def gradio_interface(pdf_file, language_code): |
| return pdf_to_audio(pdf_file, language_code) |
|
|
| |
| gr.Interface( |
| fn=gradio_interface, |
| inputs=[ |
| "file", |
| gr.Dropdown(choices=language_options, label="Select Language") |
| ], |
| outputs=[ |
| "text", |
| "audio" |
| ], |
| title="PDF to Audio using Cohere (Multi-language)" |
| ).launch(debug=True) |
|
|