Ali0044 commited on
Commit
47aa0b5
·
verified ·
1 Parent(s): 9abc92a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +20 -37
README.md CHANGED
@@ -65,23 +65,20 @@ For a detailed example of how to load and use this model, please refer to the Co
65
  ## Ethical Considerations
66
 
67
  As with any language model, care should be taken when deploying this for real-world applications. Potential biases present in the training data could be reflected in the translations. It's important to monitor its output and ensure fair and accurate use.
68
- # ==============================
69
- # LinguaFlow – English to Arabic Translator
70
- # ==============================
71
  from huggingface_hub import snapshot_download
72
  import tensorflow as tf
 
 
73
  from tensorflow.keras.preprocessing.text import tokenizer_from_json
74
  from tensorflow.keras.preprocessing.sequence import pad_sequences
75
- import os
76
- import numpy as np
77
 
78
- # ------------------------------
79
- # ------------------------------
80
  repo_id = "Ali0044/LinguaFlow"
81
- local_dir = snapshot_download(repo_id=repo_id) # لو الريبو خاص، استخدم token="HF_TOKEN"
82
 
83
- model_path = os.path.join(local_dir, "Translation_model_for_hf.keras")
84
- model = tf.keras.models.load_model(model_path)
85
 
86
  with open(os.path.join(local_dir, "tokenizer/eng_tokenizer.json"), "r", encoding="utf-8") as f:
87
  eng_tokenizer = tokenizer_from_json(f.read())
@@ -89,32 +86,18 @@ with open(os.path.join(local_dir, "tokenizer/eng_tokenizer.json"), "r", encoding
89
  with open(os.path.join(local_dir, "tokenizer/ar_tokenizer.json"), "r", encoding="utf-8") as f:
90
  ar_tokenizer = tokenizer_from_json(f.read())
91
 
92
- # ------------------------------
93
- # ------------------------------
94
- def encode_text(texts, tokenizer, max_len):
95
- seq = tokenizer.texts_to_sequences(texts)
96
- return pad_sequences(seq, maxlen=max_len, padding='post')
97
-
98
- def sequences_to_text(sequences, tokenizer):
99
- texts = []
100
- for seq in sequences:
101
- temp = [tokenizer.index_word[idx] for idx in seq if idx != 0]
102
- texts.append(' '.join(temp))
103
- return texts
104
-
105
- # ------------------------------
106
- # ------------------------------
107
  def translate(sentences):
108
- X_input = encode_text(sentences, eng_tokenizer, model.input_shape[1])
109
- preds = model.predict(X_input)
 
110
  preds = np.argmax(preds, axis=-1)
111
- return sequences_to_text(preds, ar_tokenizer)
112
-
113
- # ------------------------------
114
- # ------------------------------
115
- sample_texts = ["Hello, how are you?", "I love machine learning!"]
116
- translations = translate(sample_texts)
117
-
118
- for en, ar in zip(sample_texts, translations):
119
- print(f"English: {en}")
120
- print(f"Arabic : {ar}")
 
65
  ## Ethical Considerations
66
 
67
  As with any language model, care should be taken when deploying this for real-world applications. Potential biases present in the training data could be reflected in the translations. It's important to monitor its output and ensure fair and accurate use.
68
+ ## 🚀 How to use
69
+
70
+ ```python
71
  from huggingface_hub import snapshot_download
72
  import tensorflow as tf
73
+ import numpy as np
74
+ import os
75
  from tensorflow.keras.preprocessing.text import tokenizer_from_json
76
  from tensorflow.keras.preprocessing.sequence import pad_sequences
 
 
77
 
 
 
78
  repo_id = "Ali0044/LinguaFlow"
79
+ local_dir = snapshot_download(repo_id=repo_id)
80
 
81
+ model = tf.keras.models.load_model(os.path.join(local_dir, "Translation_model_for_hf.keras"))
 
82
 
83
  with open(os.path.join(local_dir, "tokenizer/eng_tokenizer.json"), "r", encoding="utf-8") as f:
84
  eng_tokenizer = tokenizer_from_json(f.read())
 
86
  with open(os.path.join(local_dir, "tokenizer/ar_tokenizer.json"), "r", encoding="utf-8") as f:
87
  ar_tokenizer = tokenizer_from_json(f.read())
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  def translate(sentences):
90
+ seq = eng_tokenizer.texts_to_sequences(sentences)
91
+ padded = pad_sequences(seq, maxlen=model.input_shape[1], padding='post')
92
+ preds = model.predict(padded)
93
  preds = np.argmax(preds, axis=-1)
94
+
95
+ results = []
96
+ for s in preds:
97
+ text = [ar_tokenizer.index_word[i] for i in s if i != 0]
98
+ results.append(' '.join(text))
99
+ return results
100
+
101
+ # Example
102
+ print(translate(["Hello, how are you?"]))
103
+ """