| | import torch |
| | from transformers import pipeline |
| | import librosa |
| | import os |
| | from hugchat import hugchat |
| | from hugchat.login import Login |
| | import gradio as gr |
| | import logging |
| |
|
| | |
| | logging.basicConfig(level=logging.INFO) |
| | logger = logging.getLogger(__name__) |
| |
|
| | |
| | MODEL_NAME = "openai/whisper-large-v3-turbo" |
| | device = 0 if torch.cuda.is_available() else "cpu" |
| |
|
| | |
| | pipe = pipeline( |
| | task="automatic-speech-recognition", |
| | model=MODEL_NAME, |
| | chunk_length_s=30, |
| | device=device, |
| | ) |
| |
|
| | |
| | EMAIL = os.getenv("EMAIL", "fearfreed007@gmail.com") |
| | PASSWD = os.getenv("PASSWD", "uS&m?UrB)7Y7XTP") |
| |
|
| | |
| | cookie_path_dir = "./cookies/" |
| | os.makedirs(cookie_path_dir, exist_ok=True) |
| |
|
| | |
| | chatbot = None |
| | try: |
| | sign = Login(EMAIL, PASSWD) |
| | cookies = sign.login(cookie_dir_path=cookie_path_dir, save_cookies=True) |
| | chatbot = hugchat.ChatBot(cookies=cookies.get_dict()) |
| | logger.info("Chatbot initialized successfully") |
| | except Exception as e: |
| | logger.error(f"Failed to initialize chatbot: {e}") |
| |
|
| | def transcribe_audio(audio_path): |
| | """Transcribe a local audio file using the Whisper pipeline.""" |
| | try: |
| | if not os.path.exists(audio_path): |
| | raise FileNotFoundError("Audio file not found") |
| | audio, sr = librosa.load(audio_path, sr=16000, mono=True) |
| | transcription = pipe(audio, batch_size=8, generate_kwargs={"language": "urdu"})["text"] |
| | return transcription |
| | except Exception as e: |
| | return f"Error processing audio: {str(e)}" |
| |
|
| | def extract_info_from_filename(filename): |
| | """Extract agent, file_number, city, and country from the filename.""" |
| | try: |
| | parts = filename.split('_') |
| | if len(parts) < 4: |
| | raise ValueError("Filename must have at least 4 parts: agentX_N_City_Country") |
| | agent = parts[0] |
| | file_number = int(parts[1]) |
| | city = parts[2] |
| | country = parts[3].split('.')[0] |
| | return agent, file_number, city, country |
| | except (ValueError, IndexError): |
| | return None, None, None, None |
| |
|
| | def process_audio(audio_path): |
| | """Process audio: Extract info from filename, transcribe, and generate JSON.""" |
| | if not audio_path: |
| | return '{"error": "No audio file provided"}', "", "" |
| |
|
| | |
| | filename = os.path.basename(audio_path) |
| | agent, file_number, city, country = extract_info_from_filename(filename) |
| | |
| | if agent is None: |
| | return '{"error": "Invalid filename format. Use format: agentX_N_City_Country.wav"}', "", filename |
| |
|
| | |
| | transcription = transcribe_audio(audio_path) |
| | if "Error" in transcription: |
| | return f'{{"error": "{transcription}"}}', transcription, filename |
| |
|
| | |
| | if chatbot is None: |
| | logger.warning("Chatbot unavailable, returning transcription-only JSON") |
| | return ( |
| | f'{{"records": [{{"Recording_name": "{filename}", "agent": "{agent}", "file_number": {file_number}, ' |
| | f'"city": "{city}", "country": "{country}", "transcription": "{transcription}"}}]}}', |
| | transcription, |
| | filename |
| | ) |
| |
|
| | |
| | prompt = f""" |
| | Correct the given Urdu text for grammar, word accuracy, and contextual meaning without adding anything extra. |
| | Then, translate the corrected text into English. |
| | Next, create a JSON file that detects crops and their diseases, following this format: |
| | {{ |
| | "records": [ |
| | {{ |
| | "Recording_name": "{filename}", |
| | "agent": "{agent}", |
| | "file_number": {file_number}, |
| | "city": "{city}", |
| | "country": "{country}", |
| | "crops": [ |
| | {{ |
| | "name": "<detected_crop>", |
| | "season": "<appropriate_season>", |
| | "harvest_months": ["<months>"], |
| | "regions": ["<regions>"], |
| | "diseases": [ |
| | {{ |
| | "name": "<disease>", |
| | "description": "<description>", |
| | "wikipedia_link": "<link>" |
| | }} |
| | ] |
| | }} |
| | ], |
| | "issues": ["<detected_issues>"], |
| | "disease_linking": {{ |
| | "<crop_name>": ["<disease_names>"] |
| | }} |
| | }} |
| | ] |
| | }} |
| | The Urdu text to process is: |
| | {transcription} |
| | Only provide the JSON output, do not include any additional text. |
| | """ |
| |
|
| | |
| | try: |
| | response = chatbot.chat(prompt).wait_until_done() |
| | return response, transcription, filename |
| | except Exception as e: |
| | logger.error(f"Chatbot processing failed: {e}") |
| | return ( |
| | f'{{"records": [{{"Recording_name": "{filename}", "agent": "{agent}", "file_number": {file_number}, ' |
| | f'"city": "{city}", "country": "{country}", "transcription": "{transcription}", ' |
| | f'"error": "Chatbot processing failed: {str(e)}"}}]}}', |
| | transcription, |
| | filename |
| | ) |
| |
|
| | |
| | with gr.Blocks(title="Audio Transcription and Crop Analysis") as interface: |
| | gr.Markdown("## Audio Transcription and Crop Disease Analysis") |
| | |
| | with gr.Row(): |
| | audio_input = gr.Audio(type="filepath", label="Upload Audio File (e.g., agent1_2_Multan_Pakistan.wav)") |
| | |
| | with gr.Row(): |
| | json_output = gr.Textbox(label="JSON Output", interactive=False, lines=10) |
| | transcription_output = gr.Textbox(label="Transcription (Urdu)", interactive=False, lines=5) |
| | filename_output = gr.Textbox(label="Processed Filename", interactive=False) |
| | |
| | process_button = gr.Button("Process Audio") |
| |
|
| | process_button.click( |
| | fn=process_audio, |
| | inputs=[audio_input], |
| | outputs=[json_output, transcription_output, filename_output], |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | interface.launch(server_name="0.0.0.0", server_port=7860) |