updated for resume translation
Browse files- app.py +43 -16
- sample_resume.pdf +0 -0
app.py
CHANGED
|
@@ -16,7 +16,7 @@ dwani.api_base = os.getenv("DWANI_API_BASE_URL")
|
|
| 16 |
logger.debug("DWANI_API_KEY: %s", "Set" if dwani.api_key else "Not set")
|
| 17 |
logger.debug("DWANI_API_BASE_URL: %s", dwani.api_base)
|
| 18 |
|
| 19 |
-
# Language options for dropdowns
|
| 20 |
language_options = [
|
| 21 |
("English", "eng_Latn"),
|
| 22 |
("Kannada", "kan_Knda"),
|
|
@@ -60,18 +60,35 @@ def parse_page_numbers(pages_str):
|
|
| 60 |
|
| 61 |
def results_to_markdown(results):
|
| 62 |
"""
|
| 63 |
-
Convert the results dictionary into a Markdown formatted string
|
|
|
|
| 64 |
"""
|
| 65 |
md_lines = []
|
| 66 |
for page, content in results.items():
|
| 67 |
-
md_lines.append(f"## {page}")
|
| 68 |
if "error" in content:
|
| 69 |
-
md_lines.append(f"**Error:** {content['error']}")
|
| 70 |
else:
|
| 71 |
-
md_lines.append(
|
| 72 |
-
md_lines.append(
|
| 73 |
-
md_lines.append(
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
md_lines.append("\n---\n")
|
| 76 |
return "\n".join(md_lines)
|
| 77 |
|
|
@@ -108,24 +125,34 @@ def process_pdf(pdf_file, pages_str, prompt, src_lang, tgt_lang):
|
|
| 108 |
logger.debug("Calling API with file: %s, pages: %s, prompt: %s, src_lang: %s, tgt_lang: %s",
|
| 109 |
file_path, pages, prompt, src_lang_code, tgt_lang_code)
|
| 110 |
|
| 111 |
-
system_prompt = "Do not return any asterisk"
|
| 112 |
-
|
| 113 |
results = {}
|
| 114 |
for page_number in pages:
|
| 115 |
try:
|
| 116 |
-
result = dwani.Documents.
|
| 117 |
file_path=file_path,
|
| 118 |
-
prompt=f"{prompt} {system_prompt}",
|
| 119 |
page_number=page_number,
|
| 120 |
src_lang=src_lang_code,
|
| 121 |
tgt_lang=tgt_lang_code
|
| 122 |
)
|
| 123 |
logger.debug("API response for page %d: %s", page_number, result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
results[f"Page {page_number}"] = {
|
| 125 |
-
"
|
| 126 |
-
"
|
| 127 |
-
"
|
| 128 |
-
|
|
|
|
| 129 |
}
|
| 130 |
except dwani.exceptions.DhwaniAPIError as e:
|
| 131 |
logger.error("Dhwani API error on page %d: %s", page_number, str(e))
|
|
|
|
| 16 |
logger.debug("DWANI_API_KEY: %s", "Set" if dwani.api_key else "Not set")
|
| 17 |
logger.debug("DWANI_API_BASE_URL: %s", dwani.api_base)
|
| 18 |
|
| 19 |
+
# Language options for dropdowns (display name and code)
|
| 20 |
language_options = [
|
| 21 |
("English", "eng_Latn"),
|
| 22 |
("Kannada", "kan_Knda"),
|
|
|
|
| 60 |
|
| 61 |
def results_to_markdown(results):
|
| 62 |
"""
|
| 63 |
+
Convert the results dictionary into a Markdown formatted string,
|
| 64 |
+
formatting the translated response to preserve structure using <pre> tags.
|
| 65 |
"""
|
| 66 |
md_lines = []
|
| 67 |
for page, content in results.items():
|
| 68 |
+
md_lines.append(f"## {page}\n")
|
| 69 |
if "error" in content:
|
| 70 |
+
md_lines.append(f"**Error:** {content['error']}\n")
|
| 71 |
else:
|
| 72 |
+
md_lines.append("**Original Text:**\n\n```")
|
| 73 |
+
md_lines.append(content.get('Original Text', '') + "\n")
|
| 74 |
+
md_lines.append("```\n")
|
| 75 |
+
|
| 76 |
+
response_text = content.get('Response', '')
|
| 77 |
+
if response_text:
|
| 78 |
+
md_lines.append("Response:\n\n" + response_text + "\n")
|
| 79 |
+
|
| 80 |
+
md_lines.append("**Processed Page:** " + str(content.get('Processed Page', '')) + "\n")
|
| 81 |
+
|
| 82 |
+
translated = content.get('Translated Response', '')
|
| 83 |
+
|
| 84 |
+
# Normalize newlines
|
| 85 |
+
translated = translated.replace('\r\n', '\n').replace('\r', '\n')
|
| 86 |
+
|
| 87 |
+
# Use <pre> tags to preserve formatting exactly
|
| 88 |
+
md_lines.append("**Translated Response:**\n\n<pre>")
|
| 89 |
+
md_lines.append(translated)
|
| 90 |
+
md_lines.append("</pre>")
|
| 91 |
+
|
| 92 |
md_lines.append("\n---\n")
|
| 93 |
return "\n".join(md_lines)
|
| 94 |
|
|
|
|
| 125 |
logger.debug("Calling API with file: %s, pages: %s, prompt: %s, src_lang: %s, tgt_lang: %s",
|
| 126 |
file_path, pages, prompt, src_lang_code, tgt_lang_code)
|
| 127 |
|
|
|
|
|
|
|
| 128 |
results = {}
|
| 129 |
for page_number in pages:
|
| 130 |
try:
|
| 131 |
+
result = dwani.Documents.run_extract(
|
| 132 |
file_path=file_path,
|
|
|
|
| 133 |
page_number=page_number,
|
| 134 |
src_lang=src_lang_code,
|
| 135 |
tgt_lang=tgt_lang_code
|
| 136 |
)
|
| 137 |
logger.debug("API response for page %d: %s", page_number, result)
|
| 138 |
+
|
| 139 |
+
# New response format: result contains 'pages' list
|
| 140 |
+
page_data = None
|
| 141 |
+
for p in result.get('pages', []):
|
| 142 |
+
if p.get('processed_page') == page_number:
|
| 143 |
+
page_data = p
|
| 144 |
+
break
|
| 145 |
+
|
| 146 |
+
if page_data is None:
|
| 147 |
+
results[f"Page {page_number}"] = {"error": "No data returned for this page"}
|
| 148 |
+
continue
|
| 149 |
+
|
| 150 |
results[f"Page {page_number}"] = {
|
| 151 |
+
"Processed Page": page_data.get("processed_page", "N/A"),
|
| 152 |
+
"Original Text": page_data.get("page_content", "N/A"),
|
| 153 |
+
"Translated Response": page_data.get("translated_content", "N/A"),
|
| 154 |
+
# The old 'Response' key is not in new data; set empty string
|
| 155 |
+
"Response": ""
|
| 156 |
}
|
| 157 |
except dwani.exceptions.DhwaniAPIError as e:
|
| 158 |
logger.error("Dhwani API error on page %d: %s", page_number, str(e))
|
sample_resume.pdf
ADDED
|
Binary file (74.3 kB). View file
|
|
|