Spaces:

maliahson
/

F_A_S

Sleeping

File size: 6,206 Bytes

import torch
from transformers import pipeline
import librosa
import os
from hugchat import hugchat
from hugchat.login import Login
import gradio as gr
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Model and device configuration for transcription
MODEL_NAME = "openai/whisper-large-v3-turbo"
device = 0 if torch.cuda.is_available() else "cpu"

# Initialize Whisper pipeline
pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
)

# Hugging Face Chatbot credentials from environment variables (preferred for Spaces)
EMAIL = os.getenv("EMAIL", "fearfreed007@gmail.com")  # Fallback for local testing
PASSWD = os.getenv("PASSWD", "uS&m?UrB)7Y7XTP")      # Fallback for local testing

# Directory to save cookies
cookie_path_dir = "./cookies/"
os.makedirs(cookie_path_dir, exist_ok=True)

# Initialize chatbot with error handling
chatbot = None
try:
    sign = Login(EMAIL, PASSWD)
    cookies = sign.login(cookie_dir_path=cookie_path_dir, save_cookies=True)
    chatbot = hugchat.ChatBot(cookies=cookies.get_dict())
    logger.info("Chatbot initialized successfully")
except Exception as e:
    logger.error(f"Failed to initialize chatbot: {e}")

def transcribe_audio(audio_path):
    """Transcribe a local audio file using the Whisper pipeline."""
    try:
        if not os.path.exists(audio_path):
            raise FileNotFoundError("Audio file not found")
        audio, sr = librosa.load(audio_path, sr=16000, mono=True)
        transcription = pipe(audio, batch_size=8, generate_kwargs={"language": "urdu"})["text"]
        return transcription
    except Exception as e:
        return f"Error processing audio: {str(e)}"

def extract_info_from_filename(filename):
    """Extract agent, file_number, city, and country from the filename."""
    try:
        parts = filename.split('_')
        if len(parts) < 4:
            raise ValueError("Filename must have at least 4 parts: agentX_N_City_Country")
        agent = parts[0]
        file_number = int(parts[1])
        city = parts[2]
        country = parts[3].split('.')[0]  # Remove file extension if present
        return agent, file_number, city, country
    except (ValueError, IndexError):
        return None, None, None, None

def process_audio(audio_path):
    """Process audio: Extract info from filename, transcribe, and generate JSON."""
    if not audio_path:
        return '{"error": "No audio file provided"}', "", ""

    # Extract filename and info
    filename = os.path.basename(audio_path)
    agent, file_number, city, country = extract_info_from_filename(filename)
    
    if agent is None:
        return '{"error": "Invalid filename format. Use format: agentX_N_City_Country.wav"}', "", filename

    # Transcribe audio
    transcription = transcribe_audio(audio_path)
    if "Error" in transcription:
        return f'{{"error": "{transcription}"}}', transcription, filename

    # Fallback JSON if chatbot is not initialized
    if chatbot is None:
        logger.warning("Chatbot unavailable, returning transcription-only JSON")
        return (
            f'{{"records": [{{"Recording_name": "{filename}", "agent": "{agent}", "file_number": {file_number}, '
            f'"city": "{city}", "country": "{country}", "transcription": "{transcription}"}}]}}',
            transcription,
            filename
        )

    # Construct prompt with extracted data
    prompt = f"""
    Correct the given Urdu text for grammar, word accuracy, and contextual meaning without adding anything extra. 
    Then, translate the corrected text into English.
    Next, create a JSON file that detects crops and their diseases, following this format:
    {{
      "records": [
        {{
          "Recording_name": "{filename}",
          "agent": "{agent}",
          "file_number": {file_number},
          "city": "{city}",
          "country": "{country}",
          "crops": [
            {{
              "name": "<detected_crop>",
              "season": "<appropriate_season>",
              "harvest_months": ["<months>"],
              "regions": ["<regions>"],
              "diseases": [
                {{
                  "name": "<disease>",
                  "description": "<description>",
                  "wikipedia_link": "<link>"
                }}
              ]
            }}
          ],
          "issues": ["<detected_issues>"],
          "disease_linking": {{
            "<crop_name>": ["<disease_names>"]
          }}
        }}
      ]
    }}
    The Urdu text to process is:
    {transcription}
    Only provide the JSON output, do not include any additional text.
    """

    # Process with chatbot and return JSON
    try:
        response = chatbot.chat(prompt).wait_until_done()
        return response, transcription, filename
    except Exception as e:
        logger.error(f"Chatbot processing failed: {e}")
        return (
            f'{{"records": [{{"Recording_name": "{filename}", "agent": "{agent}", "file_number": {file_number}, '
            f'"city": "{city}", "country": "{country}", "transcription": "{transcription}", '
            f'"error": "Chatbot processing failed: {str(e)}"}}]}}',
            transcription,
            filename
        )

# Gradio Interface
with gr.Blocks(title="Audio Transcription and Crop Analysis") as interface:
    gr.Markdown("## Audio Transcription and Crop Disease Analysis")
    
    with gr.Row():
        audio_input = gr.Audio(type="filepath", label="Upload Audio File (e.g., agent1_2_Multan_Pakistan.wav)")
    
    with gr.Row():
        json_output = gr.Textbox(label="JSON Output", interactive=False, lines=10)
        transcription_output = gr.Textbox(label="Transcription (Urdu)", interactive=False, lines=5)
        filename_output = gr.Textbox(label="Processed Filename", interactive=False)
    
    process_button = gr.Button("Process Audio")

    process_button.click(
        fn=process_audio,
        inputs=[audio_input],
        outputs=[json_output, transcription_output, filename_output],
    )

if __name__ == "__main__":
    interface.launch(server_name="0.0.0.0", server_port=7860)