Transformers
Italian
English
semantic-search
explainable-ai
faiss
ai-ethics
responsible-ai
llm
prompt-engineering
multimodal-ai
ai-transparency
ethical-intelligence
explainable-llm
cognitive-ai
ethical-ai
scientific-retrieval
modular-ai
memory-augmented-llm
trustworthy-ai
reasoning-engine
ai-alignment
next-gen-llm
thinking-machines
open-source-ai
explainability
ai-research
semantic audit
cognitive agent
human-centered-ai
File size: 6,163 Bytes
742cc20 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
# © 2025 Elena Marziali — Code released under Apache 2.0 license.
# See LICENSE in the repository for details.
# Removal of this copyright is prohibited.
# === Text Translation ===
# Caching dictionary for previously translated texts
translation_cache = {}
def detect_language(text):
"""Detects the language of the loaded text."""
try:
return detect(text)
except Exception as e:
print(f"Language detection error: {e}")
return "unknown"
def translate_text(text, source_lang, target_lang):
""" Translates the text with debug output to verify correctness. """
translation_model = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
print(f"Using translation model: {translation_model}")
translator = pipeline("translation", model=translation_model)
translation = translator(text)[0]['translation_text']
print(f"Original text: {text}")
print(f"Translated text: {translation}")
return translation
def extract_text_pdf(file_name):
""" Extracts text from a PDF file. """
text = ""
with pdfplumber.open(file_name) as pdf:
for page in pdf.pages:
text += page.extract_text() + "\n"
return text.strip()
def extract_text_docx(file_name):
""" Extracts text from a DOCX file. """
doc = Document(file_name)
text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
return text.strip()
def save_docx(text, output_file_name):
""" Saves translated text into a DOCX document. """
doc = Document()
doc.add_paragraph(text)
doc.save(output_file_name)
def extract_text_csv(file_name):
""" Extracts textual content from a CSV file. """
df = pd.read_csv(file_name)
text = df.astype(str).apply(lambda x: ' '.join(x), axis=1).str.cat(sep='\n')
return text.strip()
def extract_text_tsv(file_name):
""" Extracts textual content from a TSV file. """
df = pd.read_csv(file_name, sep='\t')
text = df.astype(str).apply(lambda x: ' '.join(x), axis=1).str.cat(sep='\n')
return text.strip()
def handle_file(file_name):
""" Loads the file, detects its language, and lets the user choose a target language for translation. """
extension = file_name.split('.')[-1].lower()
if extension == "pdf":
text = extract_text_pdf(file_name)
elif extension == "docx":
text = extract_text_docx(file_name)
elif extension == "csv":
text = extract_text_csv(file_name)
elif extension == "tsv":
text = extract_text_tsv(file_name)
else:
return "Unsupported format! Use PDF, DOCX, CSV, or TSV."
original_language = detect_language(text)
print(f"The file was detected in **{original_language}**.")
# List of available languages
available_languages = ["en", "fr", "de", "es", "zh", "ja", "ar", "it"]
# Ask the user for the target language
print(f"Available languages for translation: {', '.join(available_languages)}")
target_language = input("Which language do you want the explanation in? (e.g., 'en' for English, 'fr' for French): ").strip()
if target_language not in available_languages:
print("Error: Unsupported language!")
else:
print(f"The explanation will be translated into {target_language}.")
# Ensure translation is performed
translated_text = translate_text(text, original_language, target_language)
# Save the translated file
translated_file_name = f"translated_{target_language}_{file_name}"
if extension == "pdf":
with open(translated_file_name, "w", encoding="utf-8") as f:
f.write(translated_text)
elif extension == "docx":
save_docx(translated_text, translated_file_name)
return f"Translation completed! Download the file: {translated_file_name}"
# Initialize the dictionary to store journals
journal_store = {}
def save_multilingual_journal(journal_text, journal_id, target_language):
source_language = detect_language(journal_text)
if source_language != target_language:
translated_text = translate_long_text(journal_text, source_lang=source_language, target_lang=target_language)
else:
translated_text = journal_text
journal_store[journal_id] = {
"original": journal_text,
target_language: translated_text
}
embedding = safe_encode(translated_text)
index.add(np.array(embedding, dtype=np.float32))
def translate_long_text(text, source_lang="it", target_lang="en", max_chars=400):
translation_model = f"Helsinki-NLP/opus-mt-{source_lang}-{target_lang}"
translator = pipeline("translation", model=translation_model)
blocks = [text[i:i+max_chars] for i in range(0, len(text), max_chars)]
translated = []
for block in blocks:
try:
output = translator(block)[0]['translation_text']
translated.append(output)
except Exception as e:
print(f"Error translating block: {e}")
translated.append("[Translation error]")
return "\n".join(translated)
def search_similar_journals(query, target_language, top_k=3):
query_language = detect_language(query)
if query_language != target_language:
translated_query = translate_long_text(query, source_lang=query_language, target_lang=target_language)
else:
translated_query = query
query_emb = safe_encode(translated_query)
query_emb = np.array(query_emb, dtype=np.float32)
if hasattr(index, "is_trained") and not index.is_trained:
print("FAISS index is not trained.")
return []
D, I = index.search(query_emb, top_k)
results = []
for i in I[0]:
journal = journal_store.get(i, {})
results.append(journal.get(target_language, ""))
return results
# === Valid Input Function ===
def get_valid_input(message, valid_options=None):
while True:
value = input(message).strip().lower()
if not value:
print("Error! Please enter a valid value.")
elif valid_options and value not in valid_options:
print(f"Error! You must choose from: {', '.join(valid_options)}")
else:
return value |