ChakriYamasani commited on
Commit
fd77b84
·
verified ·
1 Parent(s): cbf7ae2

Update helpers.py

Browse files
Files changed (1) hide show
  1. helpers.py +58 -55
helpers.py CHANGED
@@ -7,7 +7,8 @@ from typing import List, Dict, Optional
7
  import re
8
  import bcrypt
9
  import yaml
10
- from googletrans import Translator
 
11
 
12
  # Data storage functions
13
  def load_entries() -> List[Dict]:
@@ -27,7 +28,7 @@ def save_entry(entry: Dict) -> bool:
27
  try:
28
  entries = load_entries()
29
  entries.append(entry)
30
-
31
  os.makedirs("data_entries", exist_ok=True)
32
  with open("data_entries/entries.json", "w", encoding="utf-8") as f:
33
  json.dump(entries, f, indent=2, ensure_ascii=False)
@@ -68,11 +69,7 @@ def get_languages() -> List[str]:
68
  "Kannada",
69
  "Malayalam",
70
  "Oriya",
71
- "Punjabi",
72
- "Assamese",
73
- "Nepali",
74
- "Sanskrit",
75
- "Other"
76
  ]
77
 
78
  # Text-to-Speech functionality
@@ -81,8 +78,9 @@ def text_to_speech(text: str, language: str = "en") -> None:
81
  try:
82
  from gtts import gTTS
83
  import tempfile
84
- import pygame
85
-
 
86
  # Map language names to gTTS language codes
87
  lang_map = {
88
  "English": "en",
@@ -96,28 +94,30 @@ def text_to_speech(text: str, language: str = "en") -> None:
96
  "Kannada": "kn",
97
  "Malayalam": "ml",
98
  "Punjabi": "pa",
99
- "Sanskrit": "sa"
 
 
 
100
  }
101
-
102
  lang_code = lang_map.get(language, "en")
103
-
104
  # Generate TTS
105
  tts = gTTS(text=text, lang=lang_code, slow=False)
106
-
107
- # Save to temporary file
108
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
109
  tts.save(tmp_file.name)
110
-
111
- # Play audio using Streamlit
112
  with open(tmp_file.name, "rb") as audio_file:
113
  audio_bytes = audio_file.read()
114
  st.audio(audio_bytes, format="audio/mp3")
115
-
116
  # Clean up
117
  os.unlink(tmp_file.name)
118
-
119
  except ImportError:
120
- st.error("Text-to-speech library not available. Please install gtts.")
121
  except Exception as e:
122
  st.error(f"Error in text-to-speech: {str(e)}")
123
 
@@ -127,7 +127,7 @@ def speech_to_text(language: str = "en") -> Optional[str]:
127
  try:
128
  import speech_recognition as sr
129
 
130
- # Map language names to speech recognition language codes
131
  lang_map = {
132
  "English": "en-IN",
133
  "Hindi": "hi-IN",
@@ -139,46 +139,52 @@ def speech_to_text(language: str = "en") -> Optional[str]:
139
  "Urdu": "ur-IN",
140
  "Kannada": "kn-IN",
141
  "Malayalam": "ml-IN",
142
- "Punjabi": "pa-IN"
 
143
  }
144
-
145
  lang_code = lang_map.get(language, "en-IN")
146
-
147
  # Initialize recognizer
148
  r = sr.Recognizer()
149
-
150
- # Use microphone as source
 
 
 
151
  with sr.Microphone() as source:
152
  # Adjust for ambient noise
153
  r.adjust_for_ambient_noise(source)
154
-
155
  # Listen for audio
 
156
  audio = r.listen(source, timeout=5, phrase_time_limit=10)
157
-
158
  # Recognize speech
159
  text = r.recognize_google(audio, language=lang_code)
160
  return text
161
-
162
  except ImportError:
163
- st.error("Speech recognition library not available. Please install SpeechRecognition.")
164
  return None
165
  except sr.UnknownValueError:
166
- st.error("Could not understand audio. Please try again.")
167
  return None
168
  except sr.RequestError as e:
169
- st.error(f"Speech recognition service error: {str(e)}")
170
  return None
171
  except Exception as e:
172
- st.error(f"Error in speech recognition: {str(e)}")
173
  return None
174
 
175
  # Geocoding functionality
176
  def geocode_location(location_name: str) -> Optional[tuple]:
177
- """Get coordinates for a location name."""
178
  try:
179
  from geopy.geocoders import Nominatim
180
 
181
- geolocator = Nominatim(user_agent="ancestral_archive")
 
182
  location = geolocator.geocode(location_name)
183
 
184
  if location:
@@ -186,7 +192,7 @@ def geocode_location(location_name: str) -> Optional[tuple]:
186
  return None
187
 
188
  except ImportError:
189
- st.error("Geocoding library not available. Please install geopy.")
190
  return None
191
  except Exception as e:
192
  st.error(f"Error in geocoding: {str(e)}")
@@ -194,8 +200,8 @@ def geocode_location(location_name: str) -> Optional[tuple]:
194
 
195
  # Search functionality
196
  def search_entries(entries: List[Dict], query: str, language: str = None,
197
- category: str = None, has_media: bool = False,
198
- has_location: bool = False) -> List[Dict]:
199
  """Search entries based on query and filters."""
200
  results = []
201
  query_lower = query.lower()
@@ -227,7 +233,7 @@ def search_entries(entries: List[Dict], query: str, language: str = None,
227
 
228
  # Export functionality
229
  def export_to_jsonl(entries: List[Dict], include_media: bool = True,
230
- include_coordinates: bool = True) -> str:
231
  """Export entries to JSONL format."""
232
  lines = []
233
 
@@ -256,7 +262,7 @@ def export_to_jsonl(entries: List[Dict], include_media: bool = True,
256
  return '\n'.join(lines)
257
 
258
  def export_to_csv(entries: List[Dict], include_media: bool = True,
259
- include_coordinates: bool = True) -> str:
260
  """Export entries to CSV format."""
261
  data = []
262
 
@@ -420,13 +426,11 @@ def update_user_entry_count(username: str):
420
  user_data["users"][username]["entries_submitted"] += 1
421
  save_user_data(user_data)
422
 
423
- # Translation functions
424
  def translate_text(text: str, target_lang: str, source_lang: str = "auto") -> str:
425
- """Translate text using Google Translate."""
426
  try:
427
- translator = Translator()
428
-
429
- # Language code mapping
430
  lang_mapping = {
431
  "Hindi": "hi",
432
  "English": "en",
@@ -448,20 +452,19 @@ def translate_text(text: str, target_lang: str, source_lang: str = "auto") -> st
448
  target_code = lang_mapping.get(target_lang, "en")
449
  source_code = lang_mapping.get(source_lang, "auto") if source_lang != "auto" else "auto"
450
 
451
- result = translator.translate(text, src=source_code, dest=target_code)
452
- return result.text
453
  except Exception as e:
454
- st.error(f"Translation error: {str(e)}")
455
  return text
456
 
457
  def detect_language(text: str) -> str:
458
- """Detect the language of given text."""
459
  try:
460
- translator = Translator()
461
- result = translator.detect(text)
462
-
463
- # Reverse mapping for display
464
- lang_mapping = {
465
  "hi": "Hindi",
466
  "en": "English",
467
  "bn": "Bengali",
@@ -479,7 +482,7 @@ def detect_language(text: str) -> str:
479
  "sa": "Sanskrit"
480
  }
481
 
482
- return lang_mapping.get(result.lang, "Unknown")
483
  except Exception as e:
484
  st.error(f"Language detection error: {str(e)}")
485
- return "Unknown"
 
7
  import re
8
  import bcrypt
9
  import yaml
10
+ # Changed from googletrans to deep_translator
11
+ from deep_translator import GoogleTranslator, MyMemoryTranslator # GoogleTranslator is more commonly used for general translation, MyMemoryTranslator can be a fallback
12
 
13
  # Data storage functions
14
  def load_entries() -> List[Dict]:
 
28
  try:
29
  entries = load_entries()
30
  entries.append(entry)
31
+
32
  os.makedirs("data_entries", exist_ok=True)
33
  with open("data_entries/entries.json", "w", encoding="utf-8") as f:
34
  json.dump(entries, f, indent=2, ensure_ascii=False)
 
69
  "Kannada",
70
  "Malayalam",
71
  "Oriya",
72
+ "Other" # 'Assamese', 'Nepali', 'Sanskrit' removed due to limited deep_translator support or common use cases, can be added back if needed
 
 
 
 
73
  ]
74
 
75
  # Text-to-Speech functionality
 
78
  try:
79
  from gtts import gTTS
80
  import tempfile
81
+ # pygame is for local playback, not typically needed in Streamlit Cloud/Spaces
82
+ # import pygame
83
+
84
  # Map language names to gTTS language codes
85
  lang_map = {
86
  "English": "en",
 
94
  "Kannada": "kn",
95
  "Malayalam": "ml",
96
  "Punjabi": "pa",
97
+ "Oriya": "or", # Added for completeness if gTTS supports
98
+ # "Assamese": "as", # gTTS might not support
99
+ # "Nepali": "ne", # gTTS might not support
100
+ "Sanskrit": "sa" # gTTS might have limited support
101
  }
102
+
103
  lang_code = lang_map.get(language, "en")
104
+
105
  # Generate TTS
106
  tts = gTTS(text=text, lang=lang_code, slow=False)
107
+
108
+ # Save to temporary file and play using Streamlit
109
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
110
  tts.save(tmp_file.name)
111
+
 
112
  with open(tmp_file.name, "rb") as audio_file:
113
  audio_bytes = audio_file.read()
114
  st.audio(audio_bytes, format="audio/mp3")
115
+
116
  # Clean up
117
  os.unlink(tmp_file.name)
118
+
119
  except ImportError:
120
+ st.error("Text-to-speech library not available. Please ensure 'gtts' is installed.")
121
  except Exception as e:
122
  st.error(f"Error in text-to-speech: {str(e)}")
123
 
 
127
  try:
128
  import speech_recognition as sr
129
 
130
+ # Map language names to speech recognition language codes (Google Web Speech API)
131
  lang_map = {
132
  "English": "en-IN",
133
  "Hindi": "hi-IN",
 
139
  "Urdu": "ur-IN",
140
  "Kannada": "kn-IN",
141
  "Malayalam": "ml-IN",
142
+ "Punjabi": "pa-IN",
143
+ "Oriya": "or-IN" # Assuming Indian dialect code exists
144
  }
145
+
146
  lang_code = lang_map.get(language, "en-IN")
147
+
148
  # Initialize recognizer
149
  r = sr.Recognizer()
150
+
151
+ # Use microphone as source (This will only work in a local environment with mic access)
152
+ # For deployment on Hugging Face Spaces, direct microphone access is typically not available
153
+ # You might need to consider a different STT approach for cloud deployment (e.g., pre-recorded audio upload, or a paid STT API)
154
+ st.warning("Microphone input for Speech-to-Text may not work in deployed environments like Hugging Face Spaces.")
155
  with sr.Microphone() as source:
156
  # Adjust for ambient noise
157
  r.adjust_for_ambient_noise(source)
158
+
159
  # Listen for audio
160
+ st.info("Listening... Speak now!")
161
  audio = r.listen(source, timeout=5, phrase_time_limit=10)
162
+
163
  # Recognize speech
164
  text = r.recognize_google(audio, language=lang_code)
165
  return text
166
+
167
  except ImportError:
168
+ st.error("Speech recognition library not available. Please ensure 'SpeechRecognition' is installed.")
169
  return None
170
  except sr.UnknownValueError:
171
+ st.error("Could not understand audio. Please try again or speak more clearly.")
172
  return None
173
  except sr.RequestError as e:
174
+ st.error(f"Speech recognition service error (check internet/API): {str(e)}")
175
  return None
176
  except Exception as e:
177
+ st.error(f"An unexpected error occurred in speech recognition: {str(e)}")
178
  return None
179
 
180
  # Geocoding functionality
181
  def geocode_location(location_name: str) -> Optional[tuple]:
182
+ """Get coordinates for a location name using Nominatim."""
183
  try:
184
  from geopy.geocoders import Nominatim
185
 
186
+ # Initialize geolocator with a user_agent
187
+ geolocator = Nominatim(user_agent="farming-wisdom-archive-app") # Changed user_agent
188
  location = geolocator.geocode(location_name)
189
 
190
  if location:
 
192
  return None
193
 
194
  except ImportError:
195
+ st.error("Geocoding library not available. Please ensure 'geopy' is installed.")
196
  return None
197
  except Exception as e:
198
  st.error(f"Error in geocoding: {str(e)}")
 
200
 
201
  # Search functionality
202
  def search_entries(entries: List[Dict], query: str, language: str = None,
203
+ category: str = None, has_media: bool = False,
204
+ has_location: bool = False) -> List[Dict]:
205
  """Search entries based on query and filters."""
206
  results = []
207
  query_lower = query.lower()
 
233
 
234
  # Export functionality
235
  def export_to_jsonl(entries: List[Dict], include_media: bool = True,
236
+ include_coordinates: bool = True) -> str:
237
  """Export entries to JSONL format."""
238
  lines = []
239
 
 
262
  return '\n'.join(lines)
263
 
264
  def export_to_csv(entries: List[Dict], include_media: bool = True,
265
+ include_coordinates: bool = True) -> str:
266
  """Export entries to CSV format."""
267
  data = []
268
 
 
426
  user_data["users"][username]["entries_submitted"] += 1
427
  save_user_data(user_data)
428
 
429
+ # Translation functions (using deep_translator)
430
  def translate_text(text: str, target_lang: str, source_lang: str = "auto") -> str:
431
+ """Translate text using deep_translator's GoogleTranslator."""
432
  try:
433
+ # Language code mapping (deep_translator uses standard ISO codes)
 
 
434
  lang_mapping = {
435
  "Hindi": "hi",
436
  "English": "en",
 
452
  target_code = lang_mapping.get(target_lang, "en")
453
  source_code = lang_mapping.get(source_lang, "auto") if source_lang != "auto" else "auto"
454
 
455
+ translated = GoogleTranslator(source=source_code, target=target_code).translate(text)
456
+ return translated
457
  except Exception as e:
458
+ st.error(f"Translation error: {str(e)}. Please check internet connection or try again.")
459
  return text
460
 
461
  def detect_language(text: str) -> str:
462
+ """Detect the language of given text using deep_translator's GoogleTranslator."""
463
  try:
464
+ detected_code = GoogleTranslator(source="auto", target="en").detect(text) # target 'en' is default, can be any valid language code
465
+
466
+ # Reverse mapping for display (ensure this maps codes to names)
467
+ lang_mapping_reverse = {
 
468
  "hi": "Hindi",
469
  "en": "English",
470
  "bn": "Bengali",
 
482
  "sa": "Sanskrit"
483
  }
484
 
485
+ return lang_mapping_reverse.get(detected_code, "Unknown")
486
  except Exception as e:
487
  st.error(f"Language detection error: {str(e)}")
488
+ return "Unknown"