Spaces:

NextDrought
/

worship

Sleeping

Peter Yang commited on Nov 13, 2025

Commit

d3816fa

1 Parent(s): 9124732

Improve translation completeness: better paragraph splitting for headings, improved prompts, lower min length for titles

Files changed (2) hide show

app.py CHANGED Viewed

@@ -71,6 +71,7 @@ async def translate_document(docx_path: str, output_path: str = None):
         return None
     # Split content into paragraphs and find Chinese paragraphs
     paragraphs = content.split('\n\n')
     chinese_paragraphs = []
@@ -79,6 +80,23 @@ async def translate_document(docx_path: str, output_path: str = None):
         if para:
             chinese_chars = re.findall(r'[\u4e00-\u9fff]+', para)
             if chinese_chars:
                 chinese_paragraphs.append(para)
     # Translate each paragraph

         return None
     # Split content into paragraphs and find Chinese paragraphs
+    # First split by double newlines
     paragraphs = content.split('\n\n')
     chinese_paragraphs = []
         if para:
             chinese_chars = re.findall(r'[\u4e00-\u9fff]+', para)
             if chinese_chars:
+                # Check if paragraph contains a heading followed by content (e.g., "标题：内容")
+                # Split headings from content to ensure both are translated
+                if '：' in para or ':' in para:
+                    # Check if it's a heading pattern (short line ending with colon)
+                    lines = para.split('\n')
+                    if len(lines) > 1:
+                        first_line = lines[0].strip()
+                        # If first line is short and ends with colon, treat as heading
+                        if len(first_line) < 30 and (first_line.endswith('：') or first_line.endswith(':')):
+                            # Add heading as separate paragraph
+                            chinese_paragraphs.append(first_line)
+                            # Add remaining content as separate paragraph
+                            remaining = '\n'.join(lines[1:]).strip()
+                            if remaining:
+                                chinese_paragraphs.append(remaining)
+                            continue
                 chinese_paragraphs.append(para)
     # Translate each paragraph

document_processing_agent.py CHANGED Viewed

@@ -648,7 +648,11 @@ English:"""
                 elif translation.startswith("'") and translation.endswith("'"):
                     translation = translation[1:-1].strip()
-            return translation if translation and len(translation) > 5 else None
         except Exception as e:
             print(f"Qwen2.5 translation error: {e}")

                 elif translation.startswith("'") and translation.endswith("'"):
                     translation = translation[1:-1].strip()
+            # For very short translations (like titles), lower the minimum length requirement
+            # Titles can be as short as 3 characters (e.g., "As Children of Light")
+            min_length = 3 if len(text) < 10 else 5  # Lower threshold for short inputs (likely titles)
+            return translation if translation and len(translation) >= min_length else None
         except Exception as e:
             print(f"Qwen2.5 translation error: {e}")