omdeep22
/

Gonyai-teo2

@@ -370,4 +370,34 @@ class KonkanGPT(PreTrainedModel):
             if marker and marker in response:
                 response = response.split(marker)[0].strip()
         return response

             if marker and marker in response:
                 response = response.split(marker)[0].strip()
+        # Post-processing: remove English words/phrases in brackets
+        # e.g. "निसर्गसंपदा (Mobily)" → "निसर्गसंपदा"
+        # e.g. "शार (City of Goa)" → "शार"
+        # Keeps Devanagari/numbers/Konkani punctuation in brackets intact
+        import re
+        def _is_english_content(text):
+            """True if text contains mostly Latin characters."""
+            latin = sum(1 for c in text if 'a' <= c.lower() <= 'z')
+            return latin > len(text) * 0.4
+        def _clean_brackets(text):
+            # Remove (English content) — round brackets
+            text = re.sub(
+                r'\s*\([^)]*\)',
+                lambda m: '' if _is_english_content(m.group()) else m.group(),
+                text
+            )
+            # Remove [English content] — square brackets
+            text = re.sub(
+                r'\s*\[[^\]]*\]',
+                lambda m: '' if _is_english_content(m.group()) else m.group(),
+                text
+            )
+            # Clean up extra spaces left behind
+            text = re.sub(r' {2,}', ' ', text)
+            text = re.sub(r' ([,।.!?])', r'', text)
+            return text.strip()
+        response = _clean_brackets(response)
         return response