Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| import json | |
| # Configuration des modèles | |
| MODELS = { | |
| "MPropositioneur V2 (Défaut)": "Zual/MPropositioneur-V2", | |
| "MPropositioneur V1": "Zual/MPropositioneur-V1" | |
| } | |
| # Cache pour les modèles chargés | |
| loaded_models = {} | |
| def get_model(model_name): | |
| repo_id = MODELS[model_name] | |
| if repo_id not in loaded_models: | |
| print(f"Loading {model_name} ({repo_id})...") | |
| tokenizer = AutoTokenizer.from_pretrained(repo_id) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| repo_id, | |
| torch_dtype=torch.float16, | |
| device_map="auto" | |
| ) | |
| loaded_models[repo_id] = (model, tokenizer) | |
| print(f"Loaded {model_name}.") | |
| return loaded_models[repo_id] | |
| def generate_propositions(texte, model_selection): | |
| model, tokenizer = get_model(model_selection) | |
| # Format exact du prompt d'entraînement | |
| prompt = f"<|im_start|>user\nAtomize: {texte}<|im_end|>\n<|im_start|>assistant\n" | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=8192).to(model.device) | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=2048, | |
| do_sample=False, | |
| ) | |
| # Décoder uniquement la partie générée | |
| generated_ids = outputs[0][inputs.input_ids.shape[1]:] | |
| result = tokenizer.decode(generated_ids, skip_special_tokens=True).strip() | |
| # Parser la sortie JSON ["p1", "p2", ...] ou [{"proposition": "p1"}, ...] et l'afficher proprement | |
| try: | |
| propositions = json.loads(result) | |
| if isinstance(propositions, list): | |
| clean_list = [] | |
| for p in propositions: | |
| if isinstance(p, dict) and "proposition" in p: | |
| clean_list.append(p["proposition"]) | |
| elif isinstance(p, str): | |
| clean_list.append(p) | |
| else: | |
| clean_list.append(str(p)) | |
| return "\n".join(f"• {p}" for p in clean_list) | |
| except json.JSONDecodeError: | |
| pass | |
| # Fallback : retour brut si le parsing échoue | |
| return result | |
| # Interface Gradio | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# MPropositioneur") | |
| gr.Markdown( | |
| "Décompose une phrase ou un texte en une liste de propositions atomiques.\n\n" | |
| "Entraîné par distillation par Luc Pommeret au LISN (CNRS) sur [Lab-IA](https://lab-ia.fr/) en distillation sur Qwen3-0.6B." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| input_text = gr.Textbox(lines=5, label="Texte d'entrée", placeholder="Entrez une phrase ou un passage ici...") | |
| model_dropdown = gr.Dropdown( | |
| choices=list(MODELS.keys()), | |
| value="MPropositioneur V2 (Défaut)", | |
| label="Choisir la version du modèle" | |
| ) | |
| submit_btn = gr.Button("Décomposer") | |
| with gr.Column(): | |
| output_text = gr.Textbox(label="Propositions atomiques", lines=10) | |
| submit_btn.click( | |
| fn=generate_propositions, | |
| inputs=[input_text, model_dropdown], | |
| outputs=output_text | |
| ) | |
| gr.Examples( | |
| examples=[["Le chat et le chien sont dans la cuisine.", "MPropositioneur V2 (Défaut)"]], | |
| inputs=[input_text, model_dropdown] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |