File size: 19,199 Bytes
849e58a
ad6a882
 
 
 
 
 
 
 
 
 
 
 
 
 
849e58a
 
ad6a882
 
 
 
 
 
 
 
 
 
849e58a
 
 
 
 
ad6a882
8bacc70
 
 
 
 
 
 
 
 
 
 
ad6a882
 
 
849e58a
 
ad6a882
849e58a
 
 
 
 
 
ad6a882
849e58a
ad6a882
849e58a
ad6a882
 
 
 
 
 
 
849e58a
ad6a882
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
849e58a
ad6a882
 
 
 
 
 
849e58a
 
ad6a882
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
849e58a
ad6a882
 
849e58a
 
ad6a882
 
 
849e58a
 
ad6a882
 
849e58a
 
ad6a882
 
 
 
849e58a
 
ad6a882
849e58a
 
ad6a882
849e58a
 
ad6a882
 
 
 
 
 
 
 
 
849e58a
ad6a882
 
 
 
 
849e58a
7fe71fb
 
 
 
 
8bacc70
 
ad6a882
849e58a
ad6a882
849e58a
 
ad6a882
 
 
849e58a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad6a882
 
 
 
849e58a
 
 
 
ad6a882
849e58a
 
 
ad6a882
 
849e58a
 
 
 
ad6a882
 
 
 
 
 
 
 
 
 
 
 
849e58a
 
ad6a882
 
 
 
 
 
 
 
 
849e58a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad6a882
 
 
 
 
 
 
849e58a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad6a882
 
 
849e58a
ad6a882
849e58a
ad6a882
 
849e58a
 
 
ad6a882
 
 
 
 
849e58a
ad6a882
849e58a
 
ad6a882
849e58a
 
ad6a882
 
849e58a
 
ad6a882
849e58a
 
ad6a882
 
849e58a
ad6a882
849e58a
ad6a882
 
 
 
 
849e58a
 
 
 
 
ad6a882
849e58a
ad6a882
849e58a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad6a882
849e58a
ad6a882
 
 
 
 
 
 
 
 
849e58a
ad6a882
 
849e58a
 
 
 
 
 
ad6a882
 
849e58a
 
ad6a882
849e58a
ad6a882
 
 
849e58a
 
ad6a882
849e58a
ad6a882
849e58a
 
ad6a882
849e58a
 
 
9ee21c9
 
 
 
 
 
 
7ff4b34
9ee21c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
# --- Import necessary libraries ---
import streamlit as st
import os
import sys
import time
import io # Needed for handling file streams in memory
from pathlib import Path

try:
    import google.generativeai as genai
    from google.api_core import exceptions as google_exceptions
except ImportError:
    st.error("Error: google-generativeai library not found. Please install it: `pip install google-generativeai`")
    st.stop()

# --- Configuration ---
# GEMINI_API_KEY is handled via Streamlit secrets
MODEL_NAME = "gemini-1.5-pro" # Or "gemini-1.5-flash-latest" etc.
SAFETY_SETTINGS = [
    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
]
API_CALL_DELAY = 0.5 # Optional delay in seconds between API calls
LANGUAGES = ["russian", "romanian", "english", "german", "french", "spanish"]

# --- Default File Configuration ---
DEFAULT_TXT_PATH = Path(__file__).parent / "default_pharma.txt" # <--- CHANGE FILENAME IF NEEDED
DEFAULT_TXT_LANGUAGE = "russian" # <--- CHANGE LANGUAGE IF NEEDED

# --- Core Functions ---

def load_css(file_name):
    """Loads a CSS file and injects it into the Streamlit app."""
    try:
        css_path = Path(__file__).parent / file_name
        with open(css_path) as f:
            st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
    except FileNotFoundError:
        st.error(f"CSS file not found: {file_name}. Make sure it's in the same directory as app.py.")
    except Exception as e:
        st.error(f"Error loading CSS file {file_name}: {e}")

# Global variable to hold the configured model
gemini_model = None

def configure_gemini():
    """Configures the Gemini client using Streamlit secrets."""
    global gemini_model
    api_key = st.secrets.get("GOOGLE_API_KEY")
    if not api_key:
        st.error("Error: GOOGLE_API_KEY not found in Streamlit secrets.")
        st.info("Please add your Gemini API Key to the Streamlit secrets manager.")
        gemini_model = None
        return False
    try:
        genai.configure(api_key=api_key)
        gemini_model = genai.GenerativeModel(MODEL_NAME, safety_settings=SAFETY_SETTINGS)
        # st.sidebar.success("Gemini client configured successfully.") # Optional feedback
        return True
    except Exception as e:
        st.error(f"Error configuring Gemini: {e}")
        gemini_model = None # Ensure model is None if config fails
        return False

def extract_text_from_txt(txt_file_obj):
    """Reads text content from a TXT file object (BytesIO or file)."""
    try:
        # Read as bytes first, then decode smartly
        content_bytes = txt_file_obj.read()
        try:
            # Try UTF-8 first
            text = content_bytes.decode('utf-8')
        except UnicodeDecodeError:
            try:
                # Fallback to latin-1 (or cp1252 for Windows files)
                text = content_bytes.decode('latin-1')
                st.warning("Decoded TXT file using 'latin-1'. Some characters might be misinterpreted if the encoding is different.")
            except Exception as decode_err:
                 st.error(f"Error decoding TXT file: {decode_err}. Please ensure it's UTF-8 or Latin-1 encoded.")
                 return None
        st.info(f"Successfully read text file.")
        return text
    except Exception as e:
        st.error(f"An error occurred reading the TXT file: {e}")
        return None

def translate_text_gemini(text, source_lang, target_lang, log_prefix="Text block"):
    """Translates text using the Gemini API."""
    global gemini_model
    if gemini_model is None:
        st.error("Gemini model not configured. Cannot translate.")
        return None # Indicate failure

    if not text or not text.strip():
        st.warning(f"{log_prefix}: Input text is empty or whitespace only. Skipping translation.")
        return "" # Nothing to translate

    prompt = f"""Translate the following text from {source_lang} to {target_lang}.
Preserve paragraph breaks where appropriate. Output *only* the translated text, without any introductory phrases like "Here is the translation:", or any explanations or markdown formatting. If the input text is empty or nonsensical for translation, output nothing.

Text to translate:
---
{text}
---

Translation:"""

    try:
        # Optional: Add delay between calls
        if API_CALL_DELAY > 0:
            time.sleep(API_CALL_DELAY)

        response = gemini_model.generate_content(prompt)

        # Robust check for content
        translated_text = ""
        if response.parts:
            translated_text = "".join(part.text for part in response.parts).strip()
        elif hasattr(response, 'text'): # Fallback for simpler response structures
             translated_text = response.text.strip()

        # Handle potential blocking or empty responses
        if not translated_text:
             if response.prompt_feedback and response.prompt_feedback.block_reason:
                 st.warning(f"{log_prefix}: Translation blocked. Reason: {response.prompt_feedback.block_reason}")
                 return f"[Translation blocked on {log_prefix}: {response.prompt_feedback.block_reason}]"
             else:
                 finish_reason = response.candidates[0].finish_reason if response.candidates else 'UNKNOWN'
                 if finish_reason == 'STOP':
                     if text.strip(): # Only warn if input wasn't just whitespace/empty
                         st.warning(f"{log_prefix}: Received no translated content (finish reason STOP). Original text might have been empty or untranslatable.")
                     return "" # Return empty if no content and no blocking
                 else:
                     st.warning(f"{log_prefix}: Received empty response from API. Finish Reason: {finish_reason}, Feedback: {response.prompt_feedback}")
                     return f"[Translation failed on {log_prefix}: Empty API response]"

        return translated_text

    except google_exceptions.ResourceExhausted as e:
         st.error(f"{log_prefix}: Error: Gemini API quota exceeded: {e}. Consider increasing API_CALL_DELAY or checking your quota.")
         return f"[Translation failed on {log_prefix}: Quota Exceeded - {e}]"
    except google_exceptions.InvalidArgument as e:
         st.error(f"{log_prefix}: Error: Invalid argument passed to Gemini API: {e}")
         return f"[Translation failed on {log_prefix}: Invalid Argument - {e}]"
    except Exception as e:
        st.error(f"{log_prefix}: Error during Gemini API call: {e}")
        return f"[Translation failed on {log_prefix}: {e}]"

def create_txt_from_text(translated_text):
    """Creates a TXT file content in memory."""
    try:
        txt_buffer = io.StringIO()
        txt_buffer.write(translated_text)
        txt_buffer.seek(0)
        # We need BytesIO for download button, so encode it
        txt_bytes_buffer = io.BytesIO(txt_buffer.getvalue().encode('utf-8'))
        st.info("Translated TXT file content prepared.")
        return txt_bytes_buffer
    except Exception as e:
        st.error(f"Error creating output TXT: {e}")
        return None

# --- Load CSS and Fonts ---
st.markdown("""
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Inter+Tight:ital,wght@0,100..900;1,100..900&family=Space+Grotesk:wght@300..700&display=swap" rel="stylesheet">
""", unsafe_allow_html=True)
load_css("style.css")

# --- Streamlit App UI ---
st.title("πŸ“„ TXT Document Translator")

# Configure Gemini (attempt on each run, handles secret check)
gemini_configured = configure_gemini()

# --- File Input Options ---
st.sidebar.subheader("πŸ“ Input File")

# Check if default language is valid
if DEFAULT_TXT_LANGUAGE not in LANGUAGES:
    st.sidebar.error(f"Configuration Error: Default language '{DEFAULT_TXT_LANGUAGE}' is not in the available LANGUAGES list.")
    use_default_disabled = True
    default_label = f"Default TXT option disabled (invalid language)"
else:
    use_default_disabled = False
    default_label = f"Use default {DEFAULT_TXT_LANGUAGE.capitalize()} TXT file ({DEFAULT_TXT_PATH.name})"

use_default = st.sidebar.checkbox(
    default_label,
    value=False,
    key="use_default_cb",
    disabled=use_default_disabled
)

uploaded_file = None
source_lang_selected = None

if use_default and not use_default_disabled:
    if not DEFAULT_TXT_PATH.exists():
        st.sidebar.error(f"Default TXT file '{DEFAULT_TXT_PATH.name}' not found in the app directory!")
        # Keep source_lang_selected as None to prevent translation attempt
    else:
        st.sidebar.info(f"Using default file: `{DEFAULT_TXT_PATH.name}`")
        source_lang_selected = DEFAULT_TXT_LANGUAGE # Set source language automatically
        st.sidebar.markdown(f"*(Source Language: **{source_lang_selected.capitalize()}**)*")
else:
    uploaded_file = st.sidebar.file_uploader(
        "Or, upload your TXT file",
        type=["txt"],
        accept_multiple_files=False,
        key="file_uploader_txt"
    )
    if uploaded_file:
        # Dropdown for source language ONLY if uploading
        st.sidebar.markdown("πŸ‘‡ Select the **source** language of your uploaded file:")
        source_lang_selected = st.sidebar.selectbox(
            "Source Language",
            options=[""] + LANGUAGES, # Add empty option for prompt
            index=0, # Default to empty
            key="source_lang_uploader"
        )
        if not source_lang_selected:
             st.sidebar.warning("Please select the source language of your document.")
    elif not use_default:
        st.sidebar.info("Select the default option or upload a TXT file.")


st.sidebar.markdown("---") # Separator

# --- Target Language Selection ---
st.sidebar.subheader("🎯 Target Language")
target_lang_selected = None
# Ensure a source is defined before showing target selection
if source_lang_selected:
    # Filter out the selected source language for the target options
    available_target_langs = [lang for lang in LANGUAGES if lang != source_lang_selected]
    if available_target_langs:
        target_lang_selected = st.sidebar.selectbox(
            "Translate To",
            options=[""] + available_target_langs, # Exclude source lang
            index=0, # Default to empty
            key="target_lang",
            help="Select the language you want to translate the document into."
        )
        if not target_lang_selected:
            st.sidebar.warning("Please select the target language.")
    else:
        st.sidebar.warning("No other languages available for translation target.")

elif use_default and not DEFAULT_TXT_PATH.exists():
     st.sidebar.info("Cannot select target language: Default file missing.")
elif use_default_disabled:
     st.sidebar.info("Cannot select target language: Default file configuration error.")
elif uploaded_file and not source_lang_selected:
     st.sidebar.info("Select the source language first.")
else:
    st.sidebar.info("Select or upload a file and its source language first.")


st.sidebar.markdown("---") # Separator

# --- Translate Button ---
# Determine if input conditions are met
input_ready = (use_default and source_lang_selected and DEFAULT_TXT_PATH.exists()) or \
              (uploaded_file and source_lang_selected)

can_translate = gemini_configured and input_ready and target_lang_selected

translate_button = st.sidebar.button("Translate Document", disabled=not can_translate)

# Provide feedback on why the button might be disabled
if not gemini_configured:
    st.sidebar.error("Translation disabled: Gemini not configured (check API key in secrets).")
elif not input_ready:
     if use_default and not DEFAULT_TXT_PATH.exists():
         st.sidebar.markdown("_(Cannot translate: Default file is missing)_")
     elif use_default and not source_lang_selected: # Should not happen if lang is valid, but for safety
         st.sidebar.markdown("_(Cannot translate: Default file language error)_")
     elif not use_default and not uploaded_file:
         st.sidebar.markdown("_(Upload a TXT file or select default to enable translation)_")
     elif not use_default and uploaded_file and not source_lang_selected:
         st.sidebar.markdown("_(Select source language to enable translation)_")
elif not target_lang_selected:
     st.sidebar.markdown("_(Select target language to enable translation)_")


# --- Main Area for Processing and Results ---
if translate_button and can_translate: # Double check conditions
    st.subheader("πŸš€ Translation Progress")
    output_buffer = None
    output_filename = "translation_failed.txt" # Default filename
    input_data = None
    input_filename_for_output = "default" # Default for output name generation

    with st.spinner("Processing... Please wait."):
        # 1. Get Input Data
        if use_default:
            try:
                with open(DEFAULT_TXT_PATH, "rb") as f:
                    input_data = io.BytesIO(f.read())
                input_filename_for_output = DEFAULT_TXT_PATH.name
                st.write(f"Processing default file: {input_filename_for_output} (TXT)")
            except Exception as e:
                st.error(f"Error reading default TXT file '{DEFAULT_TXT_PATH.name}': {e}")
                st.stop() # Stop processing if default file cannot be read
        elif uploaded_file:
            input_data = io.BytesIO(uploaded_file.getvalue()) # Use BytesIO for consistency
            input_filename_for_output = uploaded_file.name
            st.write(f"Processing uploaded file: {input_filename_for_output} (TXT)")
        else:
            # This case should ideally be prevented by button logic, but as a safeguard:
            st.error("No input file specified!")
            st.stop()

        # Basic validation passed in UI, but double-check core requirements
        if not input_data or not source_lang_selected or not target_lang_selected:
             st.error("Internal Error: Missing required input (file, source language, or target language) despite button being enabled.")
             st.stop()
        if source_lang_selected == target_lang_selected:
             st.error("Source and Target languages cannot be the same.")
             st.stop()

        # --- Start TXT Processing ---
        st.markdown("---")
        st.write("**Step 1: Reading Text...**")
        # Pass the BytesIO object directly to the function
        original_text = extract_text_from_txt(input_data)

        if original_text is not None:
            st.markdown("---")
            st.write(f"**Step 2: Translating text from {source_lang_selected} to {target_lang_selected}...**")
            status_text_txt = st.empty()
            status_text_txt.text("Sending text to translation API...")
            # Create a meaningful log prefix
            log_prefix = f"TXT ({Path(input_filename_for_output).name})"
            translated_text = translate_text_gemini(original_text, source_lang_selected, target_lang_selected, log_prefix=log_prefix)
            status_text_txt.text("Translation received.")

            if translated_text is not None and not translated_text.startswith("[Translation"): # Check success
                st.markdown("---")
                st.write("**Step 3: Creating Translated TXT file...**")
                output_buffer = create_txt_from_text(translated_text)
                if output_buffer:
                    # Generate filename based on original (default or uploaded)
                    output_filename = f"{Path(input_filename_for_output).stem}_translated_{target_lang_selected}.txt"
                    st.success("βœ… Translation and TXT creation successful!")
                # else: Error handled in create_txt_from_text
            elif translated_text is not None and translated_text.startswith("[Translation"):
                # Display specific error from API call if it was returned
                st.error(f"Translation step failed. Reason: {translated_text}")
            else: # translated_text is None or some other issue
                st.error("Translation failed. Cannot create TXT file.")
        else:
            st.error("Reading/Decoding TXT file failed. Cannot proceed.")

    # --- Offer Download ---
    if output_buffer:
        st.markdown("---")
        st.subheader("πŸ“₯ Download Result")
        st.download_button(
            label=f"Download {output_filename}",
            data=output_buffer,
            file_name=output_filename,
            mime="text/plain", # Correct mime type for TXT
        )
        # Display a snippet of the translation (optional)
        try:
            output_buffer.seek(0)
            snippet = output_buffer.read(1000).decode('utf-8', errors='ignore') # Read more for TXT
            st.text_area("Translation Snippet:", snippet + ("..." if len(snippet) == 1000 else ""), height=200)
        except Exception as e:
            st.warning(f"Could not display snippet: {e}")

# --- Initial Instructions ---
# Show instructions if Gemini is configured but no action has been taken yet
elif gemini_configured and not input_ready:
    st.markdown(
        f"""
        ## How to Use:

        1.  **Choose Input:**
            *   Check the box in the sidebar to use the **default {DEFAULT_TXT_LANGUAGE.capitalize()} TXT file** (`{DEFAULT_TXT_PATH.name}`).
            *   Or, **upload** your own TXT file using the uploader below the checkbox.
        2.  **Select Languages:**
            *   If uploading, select the **source language** of your file. (The source language is set automatically for the default file).
            *   Select the **target language** you want to translate to.
        3.  **Translate:** Click the "Translate Document" button in the sidebar (it will be enabled once steps 1/2 & 3 are complete).
        4.  **Download:** Once processed, a download button for the translated TXT file will appear, along with a preview snippet.
        """
    )
elif not gemini_configured:
    # Optional: Add a message here if Gemini isn't configured, though the error message is already shown in the sidebar.
    st.info("Please configure the Gemini API Key in Streamlit secrets to enable translation.")
    
import streamlit.components.v1 as components
components.html(
    """
    <script>
      function sendHeightWhenReady() {
        const el = window.parent.document.getElementsByClassName('stMain')[0];
        if (el) {
          const height = el.scrollHeight;
          window.parent.parent.postMessage({ type: 'setHeight', height: height }, '*');
        } else {
          // Retry in 100ms until the element appears
          setTimeout(sendHeightWhenReady, 1000);
        }
      }

      window.onload = sendHeightWhenReady;
      window.addEventListener('resize', sendHeightWhenReady);
      setInterval(sendHeightWhenReady, 1000);
    </script>
    """
)