maliahson commited on
Commit
7db0502
·
verified ·
1 Parent(s): 2866497

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -0
app.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import pipeline
3
+ import librosa
4
+ import os
5
+ from hugchat import hugchat
6
+ from hugchat.login import Login
7
+ import gradio as gr
8
+
9
+ # Model and device configuration for transcription
10
+ MODEL_NAME = "openai/whisper-large-v3-turbo"
11
+ device = 0 if torch.cuda.is_available() else "cpu"
12
+
13
+ # Initialize Whisper pipeline
14
+ pipe = pipeline(
15
+ task="automatic-speech-recognition",
16
+ model=MODEL_NAME,
17
+ chunk_length_s=30,
18
+ device=device,
19
+ )
20
+
21
+ # Hugging Face Chatbot credentials (use environment variables in production)
22
+ EMAIL = "fearfreed007@gmail.com" # Replace with your email or use secure methods
23
+ PASSWD = "uS&m?UrB)7Y7XTP" # Replace with your password or use secure methods
24
+
25
+ # Directory to save cookies
26
+ cookie_path_dir = "./cookies/"
27
+ os.makedirs(cookie_path_dir, exist_ok=True)
28
+
29
+ # Log in and initialize chatbot
30
+ sign = Login(EMAIL, PASSWD)
31
+ cookies = sign.login(cookie_dir_path=cookie_path_dir, save_cookies=True)
32
+ chatbot = hugchat.ChatBot(cookies=cookies.get_dict())
33
+
34
+ def transcribe_audio(audio_path):
35
+ """
36
+ Transcribe a local audio file using the Whisper pipeline.
37
+ """
38
+ try:
39
+ audio, sr = librosa.load(audio_path, sr=16000, mono=True)
40
+ transcription = pipe(audio, batch_size=8, generate_kwargs={"language": "urdu"})["text"]
41
+ return transcription
42
+ except Exception as e:
43
+ return f"Error processing audio: {e}"
44
+
45
+ def extract_info_from_filename(filename):
46
+ """
47
+ Extract agent, file_number, city, and country from the filename.
48
+ Example: 'agent1_2_Multan_Pakistan' -> agent='agent1', file_number=2, city='Multan', country='Pakistan'
49
+ """
50
+ try:
51
+ parts = filename.split('_')
52
+ if len(parts) >= 4:
53
+ agent = parts[0]
54
+ file_number = int(parts[1])
55
+ city = parts[2]
56
+ country = parts[3].split('.')[0] # Remove file extension if present
57
+ return agent, file_number, city, country
58
+ else:
59
+ raise ValueError("Filename format incorrect")
60
+ except Exception as e:
61
+ return None, None, None, None
62
+
63
+ def process_audio(audio_path):
64
+ """
65
+ Process audio: Extract info from filename, transcribe, and generate JSON via chatbot.
66
+ """
67
+ # Save filename and extract info
68
+ filename = os.path.basename(audio_path)
69
+ agent, file_number, city, country = extract_info_from_filename(filename)
70
+
71
+ if agent is None:
72
+ return '{"error": "Invalid filename format. Use format: agentX_N_City_Country.wav"}', "", ""
73
+
74
+ # Transcribe audio
75
+ transcription = transcribe_audio(audio_path)
76
+ if "Error" in transcription:
77
+ return f'{{"error": "{transcription}"}}', transcription, ""
78
+
79
+ # Construct prompt with extracted data
80
+ prompt = f"""
81
+ Correct the given Urdu text for grammar, word accuracy, and contextual meaning without adding anything extra.
82
+ Then, translate the corrected text into English.
83
+
84
+ Next, create a JSON file that detects crops and their diseases, following this format:
85
+ {{
86
+ "records": [
87
+ {{
88
+ "Recording_name": "{filename}",
89
+ "agent": "{agent}",
90
+ "file_number": {file_number},
91
+ "city": "{city}",
92
+ "country": "{country}",
93
+ "crops": [
94
+ {{
95
+ "name": "<detected_crop>",
96
+ "season": "<appropriate_season>",
97
+ "harvest_months": ["<months>"],
98
+ "regions": ["<regions>"],
99
+ "diseases": [
100
+ {{
101
+ "name": "<disease>",
102
+ "description": "<description>",
103
+ "wikipedia_link": "<link>"
104
+ }}
105
+ ]
106
+ }}
107
+ ],
108
+ "issues": ["<detected_issues>"],
109
+ "disease_linking": {{
110
+ "<crop_name>": ["<disease_names>"]
111
+ }}
112
+ }}
113
+ ]
114
+ }}
115
+
116
+ The Urdu text to process is:
117
+ {transcription}
118
+
119
+ Only provide the JSON output, do not include any additional text.
120
+ """
121
+
122
+ # Process with chatbot and return JSON
123
+ response = chatbot.chat(prompt).wait_until_done()
124
+ return response, transcription, filename
125
+
126
+ # Gradio Interface
127
+ with gr.Blocks(title="Audio Transcription and Crop Analysis") as interface:
128
+ gr.Markdown("## Audio Transcription and Crop Disease Analysis")
129
+
130
+ with gr.Row():
131
+ audio_input = gr.Audio(type="filepath", label="Upload Audio File (e.g., agent1_2_Multan_Pakistan.wav)")
132
+
133
+ with gr.Row():
134
+ json_output = gr.Textbox(label="JSON Output", interactive=False)
135
+ transcription_output = gr.Textbox(label="Transcription (Urdu)", interactive=False)
136
+ filename_output = gr.Textbox(label="Processed Filename", interactive=False)
137
+
138
+ process_button = gr.Button("Process Audio")
139
+
140
+ process_button.click(
141
+ fn=process_audio,
142
+ inputs=[audio_input],
143
+ outputs=[json_output, transcription_output, filename_output],
144
+ )
145
+
146
+ if __name__ == "__main__":
147
+ interface.launch()