Update helpers.py
Browse files- helpers.py +58 -55
helpers.py
CHANGED
|
@@ -7,7 +7,8 @@ from typing import List, Dict, Optional
|
|
| 7 |
import re
|
| 8 |
import bcrypt
|
| 9 |
import yaml
|
| 10 |
-
from googletrans
|
|
|
|
| 11 |
|
| 12 |
# Data storage functions
|
| 13 |
def load_entries() -> List[Dict]:
|
|
@@ -27,7 +28,7 @@ def save_entry(entry: Dict) -> bool:
|
|
| 27 |
try:
|
| 28 |
entries = load_entries()
|
| 29 |
entries.append(entry)
|
| 30 |
-
|
| 31 |
os.makedirs("data_entries", exist_ok=True)
|
| 32 |
with open("data_entries/entries.json", "w", encoding="utf-8") as f:
|
| 33 |
json.dump(entries, f, indent=2, ensure_ascii=False)
|
|
@@ -68,11 +69,7 @@ def get_languages() -> List[str]:
|
|
| 68 |
"Kannada",
|
| 69 |
"Malayalam",
|
| 70 |
"Oriya",
|
| 71 |
-
"
|
| 72 |
-
"Assamese",
|
| 73 |
-
"Nepali",
|
| 74 |
-
"Sanskrit",
|
| 75 |
-
"Other"
|
| 76 |
]
|
| 77 |
|
| 78 |
# Text-to-Speech functionality
|
|
@@ -81,8 +78,9 @@ def text_to_speech(text: str, language: str = "en") -> None:
|
|
| 81 |
try:
|
| 82 |
from gtts import gTTS
|
| 83 |
import tempfile
|
| 84 |
-
|
| 85 |
-
|
|
|
|
| 86 |
# Map language names to gTTS language codes
|
| 87 |
lang_map = {
|
| 88 |
"English": "en",
|
|
@@ -96,28 +94,30 @@ def text_to_speech(text: str, language: str = "en") -> None:
|
|
| 96 |
"Kannada": "kn",
|
| 97 |
"Malayalam": "ml",
|
| 98 |
"Punjabi": "pa",
|
| 99 |
-
"
|
|
|
|
|
|
|
|
|
|
| 100 |
}
|
| 101 |
-
|
| 102 |
lang_code = lang_map.get(language, "en")
|
| 103 |
-
|
| 104 |
# Generate TTS
|
| 105 |
tts = gTTS(text=text, lang=lang_code, slow=False)
|
| 106 |
-
|
| 107 |
-
# Save to temporary file
|
| 108 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
| 109 |
tts.save(tmp_file.name)
|
| 110 |
-
|
| 111 |
-
# Play audio using Streamlit
|
| 112 |
with open(tmp_file.name, "rb") as audio_file:
|
| 113 |
audio_bytes = audio_file.read()
|
| 114 |
st.audio(audio_bytes, format="audio/mp3")
|
| 115 |
-
|
| 116 |
# Clean up
|
| 117 |
os.unlink(tmp_file.name)
|
| 118 |
-
|
| 119 |
except ImportError:
|
| 120 |
-
st.error("Text-to-speech library not available. Please
|
| 121 |
except Exception as e:
|
| 122 |
st.error(f"Error in text-to-speech: {str(e)}")
|
| 123 |
|
|
@@ -127,7 +127,7 @@ def speech_to_text(language: str = "en") -> Optional[str]:
|
|
| 127 |
try:
|
| 128 |
import speech_recognition as sr
|
| 129 |
|
| 130 |
-
# Map language names to speech recognition language codes
|
| 131 |
lang_map = {
|
| 132 |
"English": "en-IN",
|
| 133 |
"Hindi": "hi-IN",
|
|
@@ -139,46 +139,52 @@ def speech_to_text(language: str = "en") -> Optional[str]:
|
|
| 139 |
"Urdu": "ur-IN",
|
| 140 |
"Kannada": "kn-IN",
|
| 141 |
"Malayalam": "ml-IN",
|
| 142 |
-
"Punjabi": "pa-IN"
|
|
|
|
| 143 |
}
|
| 144 |
-
|
| 145 |
lang_code = lang_map.get(language, "en-IN")
|
| 146 |
-
|
| 147 |
# Initialize recognizer
|
| 148 |
r = sr.Recognizer()
|
| 149 |
-
|
| 150 |
-
# Use microphone as source
|
|
|
|
|
|
|
|
|
|
| 151 |
with sr.Microphone() as source:
|
| 152 |
# Adjust for ambient noise
|
| 153 |
r.adjust_for_ambient_noise(source)
|
| 154 |
-
|
| 155 |
# Listen for audio
|
|
|
|
| 156 |
audio = r.listen(source, timeout=5, phrase_time_limit=10)
|
| 157 |
-
|
| 158 |
# Recognize speech
|
| 159 |
text = r.recognize_google(audio, language=lang_code)
|
| 160 |
return text
|
| 161 |
-
|
| 162 |
except ImportError:
|
| 163 |
-
st.error("Speech recognition library not available. Please
|
| 164 |
return None
|
| 165 |
except sr.UnknownValueError:
|
| 166 |
-
st.error("Could not understand audio. Please try again.")
|
| 167 |
return None
|
| 168 |
except sr.RequestError as e:
|
| 169 |
-
st.error(f"Speech recognition service error: {str(e)}")
|
| 170 |
return None
|
| 171 |
except Exception as e:
|
| 172 |
-
st.error(f"
|
| 173 |
return None
|
| 174 |
|
| 175 |
# Geocoding functionality
|
| 176 |
def geocode_location(location_name: str) -> Optional[tuple]:
|
| 177 |
-
"""Get coordinates for a location name."""
|
| 178 |
try:
|
| 179 |
from geopy.geocoders import Nominatim
|
| 180 |
|
| 181 |
-
geolocator
|
|
|
|
| 182 |
location = geolocator.geocode(location_name)
|
| 183 |
|
| 184 |
if location:
|
|
@@ -186,7 +192,7 @@ def geocode_location(location_name: str) -> Optional[tuple]:
|
|
| 186 |
return None
|
| 187 |
|
| 188 |
except ImportError:
|
| 189 |
-
st.error("Geocoding library not available. Please
|
| 190 |
return None
|
| 191 |
except Exception as e:
|
| 192 |
st.error(f"Error in geocoding: {str(e)}")
|
|
@@ -194,8 +200,8 @@ def geocode_location(location_name: str) -> Optional[tuple]:
|
|
| 194 |
|
| 195 |
# Search functionality
|
| 196 |
def search_entries(entries: List[Dict], query: str, language: str = None,
|
| 197 |
-
|
| 198 |
-
|
| 199 |
"""Search entries based on query and filters."""
|
| 200 |
results = []
|
| 201 |
query_lower = query.lower()
|
|
@@ -227,7 +233,7 @@ def search_entries(entries: List[Dict], query: str, language: str = None,
|
|
| 227 |
|
| 228 |
# Export functionality
|
| 229 |
def export_to_jsonl(entries: List[Dict], include_media: bool = True,
|
| 230 |
-
|
| 231 |
"""Export entries to JSONL format."""
|
| 232 |
lines = []
|
| 233 |
|
|
@@ -256,7 +262,7 @@ def export_to_jsonl(entries: List[Dict], include_media: bool = True,
|
|
| 256 |
return '\n'.join(lines)
|
| 257 |
|
| 258 |
def export_to_csv(entries: List[Dict], include_media: bool = True,
|
| 259 |
-
|
| 260 |
"""Export entries to CSV format."""
|
| 261 |
data = []
|
| 262 |
|
|
@@ -420,13 +426,11 @@ def update_user_entry_count(username: str):
|
|
| 420 |
user_data["users"][username]["entries_submitted"] += 1
|
| 421 |
save_user_data(user_data)
|
| 422 |
|
| 423 |
-
# Translation functions
|
| 424 |
def translate_text(text: str, target_lang: str, source_lang: str = "auto") -> str:
|
| 425 |
-
"""Translate text using
|
| 426 |
try:
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
# Language code mapping
|
| 430 |
lang_mapping = {
|
| 431 |
"Hindi": "hi",
|
| 432 |
"English": "en",
|
|
@@ -448,20 +452,19 @@ def translate_text(text: str, target_lang: str, source_lang: str = "auto") -> st
|
|
| 448 |
target_code = lang_mapping.get(target_lang, "en")
|
| 449 |
source_code = lang_mapping.get(source_lang, "auto") if source_lang != "auto" else "auto"
|
| 450 |
|
| 451 |
-
|
| 452 |
-
return
|
| 453 |
except Exception as e:
|
| 454 |
-
st.error(f"Translation error: {str(e)}")
|
| 455 |
return text
|
| 456 |
|
| 457 |
def detect_language(text: str) -> str:
|
| 458 |
-
"""Detect the language of given text."""
|
| 459 |
try:
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
lang_mapping = {
|
| 465 |
"hi": "Hindi",
|
| 466 |
"en": "English",
|
| 467 |
"bn": "Bengali",
|
|
@@ -479,7 +482,7 @@ def detect_language(text: str) -> str:
|
|
| 479 |
"sa": "Sanskrit"
|
| 480 |
}
|
| 481 |
|
| 482 |
-
return
|
| 483 |
except Exception as e:
|
| 484 |
st.error(f"Language detection error: {str(e)}")
|
| 485 |
-
return "Unknown"
|
|
|
|
| 7 |
import re
|
| 8 |
import bcrypt
|
| 9 |
import yaml
|
| 10 |
+
# Changed from googletrans to deep_translator
|
| 11 |
+
from deep_translator import GoogleTranslator, MyMemoryTranslator # GoogleTranslator is more commonly used for general translation, MyMemoryTranslator can be a fallback
|
| 12 |
|
| 13 |
# Data storage functions
|
| 14 |
def load_entries() -> List[Dict]:
|
|
|
|
| 28 |
try:
|
| 29 |
entries = load_entries()
|
| 30 |
entries.append(entry)
|
| 31 |
+
|
| 32 |
os.makedirs("data_entries", exist_ok=True)
|
| 33 |
with open("data_entries/entries.json", "w", encoding="utf-8") as f:
|
| 34 |
json.dump(entries, f, indent=2, ensure_ascii=False)
|
|
|
|
| 69 |
"Kannada",
|
| 70 |
"Malayalam",
|
| 71 |
"Oriya",
|
| 72 |
+
"Other" # 'Assamese', 'Nepali', 'Sanskrit' removed due to limited deep_translator support or common use cases, can be added back if needed
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
]
|
| 74 |
|
| 75 |
# Text-to-Speech functionality
|
|
|
|
| 78 |
try:
|
| 79 |
from gtts import gTTS
|
| 80 |
import tempfile
|
| 81 |
+
# pygame is for local playback, not typically needed in Streamlit Cloud/Spaces
|
| 82 |
+
# import pygame
|
| 83 |
+
|
| 84 |
# Map language names to gTTS language codes
|
| 85 |
lang_map = {
|
| 86 |
"English": "en",
|
|
|
|
| 94 |
"Kannada": "kn",
|
| 95 |
"Malayalam": "ml",
|
| 96 |
"Punjabi": "pa",
|
| 97 |
+
"Oriya": "or", # Added for completeness if gTTS supports
|
| 98 |
+
# "Assamese": "as", # gTTS might not support
|
| 99 |
+
# "Nepali": "ne", # gTTS might not support
|
| 100 |
+
"Sanskrit": "sa" # gTTS might have limited support
|
| 101 |
}
|
| 102 |
+
|
| 103 |
lang_code = lang_map.get(language, "en")
|
| 104 |
+
|
| 105 |
# Generate TTS
|
| 106 |
tts = gTTS(text=text, lang=lang_code, slow=False)
|
| 107 |
+
|
| 108 |
+
# Save to temporary file and play using Streamlit
|
| 109 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
|
| 110 |
tts.save(tmp_file.name)
|
| 111 |
+
|
|
|
|
| 112 |
with open(tmp_file.name, "rb") as audio_file:
|
| 113 |
audio_bytes = audio_file.read()
|
| 114 |
st.audio(audio_bytes, format="audio/mp3")
|
| 115 |
+
|
| 116 |
# Clean up
|
| 117 |
os.unlink(tmp_file.name)
|
| 118 |
+
|
| 119 |
except ImportError:
|
| 120 |
+
st.error("Text-to-speech library not available. Please ensure 'gtts' is installed.")
|
| 121 |
except Exception as e:
|
| 122 |
st.error(f"Error in text-to-speech: {str(e)}")
|
| 123 |
|
|
|
|
| 127 |
try:
|
| 128 |
import speech_recognition as sr
|
| 129 |
|
| 130 |
+
# Map language names to speech recognition language codes (Google Web Speech API)
|
| 131 |
lang_map = {
|
| 132 |
"English": "en-IN",
|
| 133 |
"Hindi": "hi-IN",
|
|
|
|
| 139 |
"Urdu": "ur-IN",
|
| 140 |
"Kannada": "kn-IN",
|
| 141 |
"Malayalam": "ml-IN",
|
| 142 |
+
"Punjabi": "pa-IN",
|
| 143 |
+
"Oriya": "or-IN" # Assuming Indian dialect code exists
|
| 144 |
}
|
| 145 |
+
|
| 146 |
lang_code = lang_map.get(language, "en-IN")
|
| 147 |
+
|
| 148 |
# Initialize recognizer
|
| 149 |
r = sr.Recognizer()
|
| 150 |
+
|
| 151 |
+
# Use microphone as source (This will only work in a local environment with mic access)
|
| 152 |
+
# For deployment on Hugging Face Spaces, direct microphone access is typically not available
|
| 153 |
+
# You might need to consider a different STT approach for cloud deployment (e.g., pre-recorded audio upload, or a paid STT API)
|
| 154 |
+
st.warning("Microphone input for Speech-to-Text may not work in deployed environments like Hugging Face Spaces.")
|
| 155 |
with sr.Microphone() as source:
|
| 156 |
# Adjust for ambient noise
|
| 157 |
r.adjust_for_ambient_noise(source)
|
| 158 |
+
|
| 159 |
# Listen for audio
|
| 160 |
+
st.info("Listening... Speak now!")
|
| 161 |
audio = r.listen(source, timeout=5, phrase_time_limit=10)
|
| 162 |
+
|
| 163 |
# Recognize speech
|
| 164 |
text = r.recognize_google(audio, language=lang_code)
|
| 165 |
return text
|
| 166 |
+
|
| 167 |
except ImportError:
|
| 168 |
+
st.error("Speech recognition library not available. Please ensure 'SpeechRecognition' is installed.")
|
| 169 |
return None
|
| 170 |
except sr.UnknownValueError:
|
| 171 |
+
st.error("Could not understand audio. Please try again or speak more clearly.")
|
| 172 |
return None
|
| 173 |
except sr.RequestError as e:
|
| 174 |
+
st.error(f"Speech recognition service error (check internet/API): {str(e)}")
|
| 175 |
return None
|
| 176 |
except Exception as e:
|
| 177 |
+
st.error(f"An unexpected error occurred in speech recognition: {str(e)}")
|
| 178 |
return None
|
| 179 |
|
| 180 |
# Geocoding functionality
|
| 181 |
def geocode_location(location_name: str) -> Optional[tuple]:
|
| 182 |
+
"""Get coordinates for a location name using Nominatim."""
|
| 183 |
try:
|
| 184 |
from geopy.geocoders import Nominatim
|
| 185 |
|
| 186 |
+
# Initialize geolocator with a user_agent
|
| 187 |
+
geolocator = Nominatim(user_agent="farming-wisdom-archive-app") # Changed user_agent
|
| 188 |
location = geolocator.geocode(location_name)
|
| 189 |
|
| 190 |
if location:
|
|
|
|
| 192 |
return None
|
| 193 |
|
| 194 |
except ImportError:
|
| 195 |
+
st.error("Geocoding library not available. Please ensure 'geopy' is installed.")
|
| 196 |
return None
|
| 197 |
except Exception as e:
|
| 198 |
st.error(f"Error in geocoding: {str(e)}")
|
|
|
|
| 200 |
|
| 201 |
# Search functionality
|
| 202 |
def search_entries(entries: List[Dict], query: str, language: str = None,
|
| 203 |
+
category: str = None, has_media: bool = False,
|
| 204 |
+
has_location: bool = False) -> List[Dict]:
|
| 205 |
"""Search entries based on query and filters."""
|
| 206 |
results = []
|
| 207 |
query_lower = query.lower()
|
|
|
|
| 233 |
|
| 234 |
# Export functionality
|
| 235 |
def export_to_jsonl(entries: List[Dict], include_media: bool = True,
|
| 236 |
+
include_coordinates: bool = True) -> str:
|
| 237 |
"""Export entries to JSONL format."""
|
| 238 |
lines = []
|
| 239 |
|
|
|
|
| 262 |
return '\n'.join(lines)
|
| 263 |
|
| 264 |
def export_to_csv(entries: List[Dict], include_media: bool = True,
|
| 265 |
+
include_coordinates: bool = True) -> str:
|
| 266 |
"""Export entries to CSV format."""
|
| 267 |
data = []
|
| 268 |
|
|
|
|
| 426 |
user_data["users"][username]["entries_submitted"] += 1
|
| 427 |
save_user_data(user_data)
|
| 428 |
|
| 429 |
+
# Translation functions (using deep_translator)
|
| 430 |
def translate_text(text: str, target_lang: str, source_lang: str = "auto") -> str:
|
| 431 |
+
"""Translate text using deep_translator's GoogleTranslator."""
|
| 432 |
try:
|
| 433 |
+
# Language code mapping (deep_translator uses standard ISO codes)
|
|
|
|
|
|
|
| 434 |
lang_mapping = {
|
| 435 |
"Hindi": "hi",
|
| 436 |
"English": "en",
|
|
|
|
| 452 |
target_code = lang_mapping.get(target_lang, "en")
|
| 453 |
source_code = lang_mapping.get(source_lang, "auto") if source_lang != "auto" else "auto"
|
| 454 |
|
| 455 |
+
translated = GoogleTranslator(source=source_code, target=target_code).translate(text)
|
| 456 |
+
return translated
|
| 457 |
except Exception as e:
|
| 458 |
+
st.error(f"Translation error: {str(e)}. Please check internet connection or try again.")
|
| 459 |
return text
|
| 460 |
|
| 461 |
def detect_language(text: str) -> str:
|
| 462 |
+
"""Detect the language of given text using deep_translator's GoogleTranslator."""
|
| 463 |
try:
|
| 464 |
+
detected_code = GoogleTranslator(source="auto", target="en").detect(text) # target 'en' is default, can be any valid language code
|
| 465 |
+
|
| 466 |
+
# Reverse mapping for display (ensure this maps codes to names)
|
| 467 |
+
lang_mapping_reverse = {
|
|
|
|
| 468 |
"hi": "Hindi",
|
| 469 |
"en": "English",
|
| 470 |
"bn": "Bengali",
|
|
|
|
| 482 |
"sa": "Sanskrit"
|
| 483 |
}
|
| 484 |
|
| 485 |
+
return lang_mapping_reverse.get(detected_code, "Unknown")
|
| 486 |
except Exception as e:
|
| 487 |
st.error(f"Language detection error: {str(e)}")
|
| 488 |
+
return "Unknown"
|