Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -737,7 +737,6 @@ def process_segment_with_gpt(segment, source_lang, target_lang, model="gpt-4", o
|
|
| 737 |
"""
|
| 738 |
Processes a single text segment: restores punctuation and translates using an OpenAI GPT model.
|
| 739 |
"""
|
| 740 |
-
# Essential check: Ensure the OpenAI client is provided
|
| 741 |
if openai_client is None:
|
| 742 |
segment_identifier = f"{segment.get('start', 'N/A')}-{segment.get('end', 'N/A')}"
|
| 743 |
logger.error(f"❌ OpenAI client was not provided for segment {segment_identifier}. Cannot process.")
|
|
@@ -767,31 +766,46 @@ def process_segment_with_gpt(segment, source_lang, target_lang, model="gpt-4", o
|
|
| 767 |
|
| 768 |
try:
|
| 769 |
logger.debug(f"Sending request to OpenAI model '{model}' for segment {segment_id}...")
|
| 770 |
-
response = openai_client.chat.completions.create(
|
| 771 |
model=model,
|
| 772 |
messages=[{"role": "user", "content": prompt}],
|
| 773 |
temperature=0.3
|
| 774 |
)
|
| 775 |
content = response.choices[0].message.content.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 776 |
logger.debug(
|
| 777 |
-
f"
|
| 778 |
-
f"'{
|
| 779 |
)
|
| 780 |
|
| 781 |
result_json = {}
|
| 782 |
try:
|
| 783 |
-
|
| 784 |
-
result_json = json.loads(content)
|
| 785 |
except json.JSONDecodeError as e:
|
| 786 |
logger.warning(
|
| 787 |
f"⚠️ Failed to parse JSON response for segment {segment_id}. Error: {e}. "
|
| 788 |
-
f"
|
| 789 |
)
|
| 790 |
-
# Fallback behavior if JSON parsing fails: use original text, empty translation
|
| 791 |
punctuated_text = original_text
|
| 792 |
-
translated_text = ""
|
| 793 |
else:
|
| 794 |
-
# If JSON parsing was successful
|
| 795 |
punctuated_text = result_json.get("punctuated", original_text)
|
| 796 |
translated_text = result_json.get("translated", "")
|
| 797 |
|
|
@@ -808,12 +822,10 @@ def process_segment_with_gpt(segment, source_lang, target_lang, model="gpt-4", o
|
|
| 808 |
"translated": translated_text
|
| 809 |
}
|
| 810 |
except Exception as e:
|
| 811 |
-
# Log the full traceback using exc_info=True for better debugging
|
| 812 |
logger.error(
|
| 813 |
-
f"❌ An unexpected error occurred
|
| 814 |
-
exc_info=True
|
| 815 |
)
|
| 816 |
-
# Return the original segment with an empty translated text on error
|
| 817 |
return {
|
| 818 |
"start": segment["start"],
|
| 819 |
"end": segment["end"],
|
|
|
|
| 737 |
"""
|
| 738 |
Processes a single text segment: restores punctuation and translates using an OpenAI GPT model.
|
| 739 |
"""
|
|
|
|
| 740 |
if openai_client is None:
|
| 741 |
segment_identifier = f"{segment.get('start', 'N/A')}-{segment.get('end', 'N/A')}"
|
| 742 |
logger.error(f"❌ OpenAI client was not provided for segment {segment_identifier}. Cannot process.")
|
|
|
|
| 766 |
|
| 767 |
try:
|
| 768 |
logger.debug(f"Sending request to OpenAI model '{model}' for segment {segment_id}...")
|
| 769 |
+
response = openai_client.chat.completions.create(
|
| 770 |
model=model,
|
| 771 |
messages=[{"role": "user", "content": prompt}],
|
| 772 |
temperature=0.3
|
| 773 |
)
|
| 774 |
content = response.choices[0].message.content.strip()
|
| 775 |
+
|
| 776 |
+
# --- NEW LOGIC: Clean markdown code block fences from the response ---
|
| 777 |
+
cleaned_content = content
|
| 778 |
+
if content.startswith("```") and content.endswith("```"):
|
| 779 |
+
# Attempt to find the actual JSON object within the markdown fence
|
| 780 |
+
json_start_index = content.find('{')
|
| 781 |
+
json_end_index = content.rfind('}')
|
| 782 |
+
|
| 783 |
+
if json_start_index != -1 and json_end_index != -1 and json_end_index > json_start_index:
|
| 784 |
+
cleaned_content = content[json_start_index : json_end_index + 1]
|
| 785 |
+
logger.debug(f"Removed markdown fences for segment {segment_id}. Extracted JSON portion.")
|
| 786 |
+
else:
|
| 787 |
+
logger.warning(
|
| 788 |
+
f"⚠️ Content starts/ends with '```' but a valid JSON object ({{...}}) was not found within "
|
| 789 |
+
f"fences for segment {segment_id}. Attempting to parse raw content. Raw content: '{content}'"
|
| 790 |
+
)
|
| 791 |
+
# --- END NEW LOGIC ---
|
| 792 |
+
|
| 793 |
logger.debug(
|
| 794 |
+
f"Attempting to parse JSON for segment {segment_id}. "
|
| 795 |
+
f"Content for parsing preview: '{cleaned_content[:200]}{'...' if len(cleaned_content) > 200 else ''}'"
|
| 796 |
)
|
| 797 |
|
| 798 |
result_json = {}
|
| 799 |
try:
|
| 800 |
+
result_json = json.loads(cleaned_content)
|
|
|
|
| 801 |
except json.JSONDecodeError as e:
|
| 802 |
logger.warning(
|
| 803 |
f"⚠️ Failed to parse JSON response for segment {segment_id}. Error: {e}. "
|
| 804 |
+
f"Content attempted to parse: '{cleaned_content}'" # Log cleaned content here
|
| 805 |
)
|
|
|
|
| 806 |
punctuated_text = original_text
|
| 807 |
+
translated_text = "" # Return empty translated text on parsing failure
|
| 808 |
else:
|
|
|
|
| 809 |
punctuated_text = result_json.get("punctuated", original_text)
|
| 810 |
translated_text = result_json.get("translated", "")
|
| 811 |
|
|
|
|
| 822 |
"translated": translated_text
|
| 823 |
}
|
| 824 |
except Exception as e:
|
|
|
|
| 825 |
logger.error(
|
| 826 |
+
f"❌ An unexpected error occurred for segment {segment_id}: {e}",
|
| 827 |
+
exc_info=True # This logs the full traceback
|
| 828 |
)
|
|
|
|
| 829 |
return {
|
| 830 |
"start": segment["start"],
|
| 831 |
"end": segment["end"],
|