Ali0044 commited on
Commit
9abc92a
·
verified ·
1 Parent(s): c388ee4

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +38 -21
README.md CHANGED
@@ -65,20 +65,23 @@ For a detailed example of how to load and use this model, please refer to the Co
65
  ## Ethical Considerations
66
 
67
  As with any language model, care should be taken when deploying this for real-world applications. Potential biases present in the training data could be reflected in the translations. It's important to monitor its output and ensure fair and accurate use.
68
- ## 🚀 How to use
69
-
70
- ```python
71
  from huggingface_hub import snapshot_download
72
  import tensorflow as tf
73
- import numpy as np
74
- import os
75
  from tensorflow.keras.preprocessing.text import tokenizer_from_json
76
  from tensorflow.keras.preprocessing.sequence import pad_sequences
 
 
77
 
78
- repo_id = "{repo_id}"
79
- local_dir = snapshot_download(repo_id=repo_id)
 
 
80
 
81
- model = tf.keras.models.load_model(os.path.join(local_dir, "Translation_model_for_hf.keras"))
 
82
 
83
  with open(os.path.join(local_dir, "tokenizer/eng_tokenizer.json"), "r", encoding="utf-8") as f:
84
  eng_tokenizer = tokenizer_from_json(f.read())
@@ -86,18 +89,32 @@ with open(os.path.join(local_dir, "tokenizer/eng_tokenizer.json"), "r", encoding
86
  with open(os.path.join(local_dir, "tokenizer/ar_tokenizer.json"), "r", encoding="utf-8") as f:
87
  ar_tokenizer = tokenizer_from_json(f.read())
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  def translate(sentences):
90
- seq = eng_tokenizer.texts_to_sequences(sentences)
91
- padded = pad_sequences(seq, maxlen=model.input_shape[1], padding='post')
92
- preds = model.predict(padded)
93
  preds = np.argmax(preds, axis=-1)
94
-
95
- results = []
96
- for s in preds:
97
- text = [ar_tokenizer.index_word[i] for i in s if i != 0]
98
- results.append(' '.join(text))
99
- return results
100
-
101
- # Example
102
- print(translate(["Hello, how are you?"]))
103
- """
 
65
  ## Ethical Considerations
66
 
67
  As with any language model, care should be taken when deploying this for real-world applications. Potential biases present in the training data could be reflected in the translations. It's important to monitor its output and ensure fair and accurate use.
68
+ # ==============================
69
+ # LinguaFlow – English to Arabic Translator
70
+ # ==============================
71
  from huggingface_hub import snapshot_download
72
  import tensorflow as tf
 
 
73
  from tensorflow.keras.preprocessing.text import tokenizer_from_json
74
  from tensorflow.keras.preprocessing.sequence import pad_sequences
75
+ import os
76
+ import numpy as np
77
 
78
+ # ------------------------------
79
+ # ------------------------------
80
+ repo_id = "Ali0044/LinguaFlow"
81
+ local_dir = snapshot_download(repo_id=repo_id) # لو الريبو خاص، استخدم token="HF_TOKEN"
82
 
83
+ model_path = os.path.join(local_dir, "Translation_model_for_hf.keras")
84
+ model = tf.keras.models.load_model(model_path)
85
 
86
  with open(os.path.join(local_dir, "tokenizer/eng_tokenizer.json"), "r", encoding="utf-8") as f:
87
  eng_tokenizer = tokenizer_from_json(f.read())
 
89
  with open(os.path.join(local_dir, "tokenizer/ar_tokenizer.json"), "r", encoding="utf-8") as f:
90
  ar_tokenizer = tokenizer_from_json(f.read())
91
 
92
+ # ------------------------------
93
+ # ------------------------------
94
+ def encode_text(texts, tokenizer, max_len):
95
+ seq = tokenizer.texts_to_sequences(texts)
96
+ return pad_sequences(seq, maxlen=max_len, padding='post')
97
+
98
+ def sequences_to_text(sequences, tokenizer):
99
+ texts = []
100
+ for seq in sequences:
101
+ temp = [tokenizer.index_word[idx] for idx in seq if idx != 0]
102
+ texts.append(' '.join(temp))
103
+ return texts
104
+
105
+ # ------------------------------
106
+ # ------------------------------
107
  def translate(sentences):
108
+ X_input = encode_text(sentences, eng_tokenizer, model.input_shape[1])
109
+ preds = model.predict(X_input)
 
110
  preds = np.argmax(preds, axis=-1)
111
+ return sequences_to_text(preds, ar_tokenizer)
112
+
113
+ # ------------------------------
114
+ # ------------------------------
115
+ sample_texts = ["Hello, how are you?", "I love machine learning!"]
116
+ translations = translate(sample_texts)
117
+
118
+ for en, ar in zip(sample_texts, translations):
119
+ print(f"English: {en}")
120
+ print(f"Arabic : {ar}")