Update README.md
Browse files
README.md
CHANGED
|
@@ -5,14 +5,66 @@ This is a fine-tuned version of `Llama-3.1` for text summarization in Darija (Mo
|
|
| 5 |
## Usage
|
| 6 |
|
| 7 |
```python
|
|
|
|
|
|
|
| 8 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 9 |
|
| 10 |
model = AutoModelForCausalLM.from_pretrained("your_username/my-finetuned-darija-model")
|
| 11 |
tokenizer = AutoTokenizer.from_pretrained("your_username/my-finetuned-darija-model")
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
|
|
|
| 5 |
## Usage
|
| 6 |
|
| 7 |
```python
|
| 8 |
+
!pip install gradio
|
| 9 |
+
import gradio as gr
|
| 10 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 11 |
|
| 12 |
model = AutoModelForCausalLM.from_pretrained("your_username/my-finetuned-darija-model")
|
| 13 |
tokenizer = AutoTokenizer.from_pretrained("your_username/my-finetuned-darija-model")
|
| 14 |
|
| 15 |
+
def summarize_text(article_content):
|
| 16 |
+
"""
|
| 17 |
+
Fonction pour générer un résumé à partir du texte d'entrée.
|
| 18 |
+
"""
|
| 19 |
+
# Preprocessing: remove unwanted characters (e.g., </)
|
| 20 |
+
cleaned_article_content = article_content.replace('</', '').replace('>', '')
|
| 21 |
|
| 22 |
+
# Formater le texte d'entrée avec les instructions
|
| 23 |
+
input_text = f"""Below is an article written in Darija. Write a concise and accurate summary:
|
| 24 |
+
|
| 25 |
+
### Article:
|
| 26 |
+
{cleaned_article_content}
|
| 27 |
+
### Summary:
|
| 28 |
+
"""
|
| 29 |
+
# Tokenisation et génération
|
| 30 |
+
inputs = tokenizer([input_text], return_tensors="pt", truncation=True).to("cuda")
|
| 31 |
+
outputs = model.generate(**inputs, max_new_tokens=50)
|
| 32 |
+
|
| 33 |
+
# Décoder le résumé et nettoyer les tags inutiles
|
| 34 |
+
summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 35 |
+
|
| 36 |
+
# Nettoyer les tokens supplémentaires (</s> ou autres)
|
| 37 |
+
summary = summary.replace('</s>', '').strip()
|
| 38 |
+
|
| 39 |
+
# Ne retourner que la partie du résumé (pas les autres sections)
|
| 40 |
+
if "### Summary:" in summary:
|
| 41 |
+
summary = summary.split("### Summary:")[1].strip()
|
| 42 |
+
|
| 43 |
+
return summary
|
| 44 |
+
|
| 45 |
+
# Interface Gradio
|
| 46 |
+
with gr.Blocks() as demo:
|
| 47 |
+
gr.Markdown("## Résumé de texte en Darija")
|
| 48 |
+
gr.Markdown(
|
| 49 |
+
"Entrez le contenu de l'article en Darija dans la zone ci-dessous. "
|
| 50 |
+
"Un résumé sera généré et affiché."
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
with gr.Row():
|
| 54 |
+
input_box = gr.Textbox(
|
| 55 |
+
label="Contenu de l'article",
|
| 56 |
+
placeholder="Entrez uniquement le contenu de l'article ici...",
|
| 57 |
+
lines=10
|
| 58 |
+
)
|
| 59 |
+
output_box = gr.Textbox(
|
| 60 |
+
label="Résumé généré",
|
| 61 |
+
placeholder="Le résumé apparaîtra ici...",
|
| 62 |
+
lines=5
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
generate_button = gr.Button("Générer le résumé")
|
| 66 |
+
generate_button.click(summarize_text, inputs=input_box, outputs=output_box)
|
| 67 |
+
|
| 68 |
+
# Lancer l'application
|
| 69 |
+
demo.launch()
|
| 70 |
|