Spaces:

Woziii
/

Chorege_agentManager

Paused

App Files Files Community

Woziii commited on Dec 4, 2024

Commit

4e89b1d

verified ·

1 Parent(s): 90f2716

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -27

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, pipeline
 from datetime import datetime
 import os
 import json
@@ -10,6 +10,7 @@ import requests
 from bs4 import BeautifulSoup
 from concurrent.futures import ThreadPoolExecutor
 import re
 # --- Configuration du logger ---
 logging.basicConfig(
@@ -22,11 +23,9 @@ logging.basicConfig(
 )
 # --- Authentification Hugging Face ---
-# Assurez-vous que la variable d'environnement HF_TOKEN est définie avec votre token Hugging Face
-# Sinon, vous pouvez la définir directement ici
-# os.environ["HF_TOKEN"] = "votre_token_huggingface"
-login(token=os.environ["HF_TOKEN"])
 # Variables globales
 project_state = {
     "AgentManager": {"structured_summary": None},
@@ -36,11 +35,11 @@ project_state = {
 }
 # Chargement du modèle
-manager_model_name = "meta-llama/Llama-3.1-8B-Instruct"
 manager_model = AutoModelForCausalLM.from_pretrained(
     manager_model_name,
     device_map="auto",
-    torch_dtype="auto"
 )
 manager_tokenizer = AutoTokenizer.from_pretrained(manager_model_name)
@@ -98,8 +97,9 @@ def clean_output(response, system_prompt, conversation_context):
     response = response.replace(system_prompt, "").replace(conversation_context, "").strip()
     return response
 def agent_manager(chat_history, user_input):
-    """Gère les interactions utilisateur et assistant."""
     # Préparer le contexte des variables
     variables_context = get_variables_context()
@@ -117,35 +117,41 @@ def agent_manager(chat_history, user_input):
     # Ajouter l'entrée utilisateur actuelle
     chat_history.append({"user": user_input, "assistant": ""})
-    # Générer la réponse du modèle
-    input_ids = manager_tokenizer(system_prompt + "\nUtilisateur : " + user_input, return_tensors="pt").to(manager_model.device)
-    output_ids = manager_model.generate(
-        input_ids["input_ids"],
         max_new_tokens=MAX_NEW_TOKENS,
         temperature=TEMPERATURE,
         top_p=TOP_P,
         eos_token_id=manager_tokenizer.eos_token_id,
-        pad_token_id=manager_tokenizer.pad_token_id
     )
-    response = manager_tokenizer.decode(output_ids[0], skip_special_tokens=True)
-    # Nettoyer la sortie
-    response = clean_output(response, system_prompt, conversation_context)
-    # Mettre à jour l'historique
-    chat_history[-1]["assistant"] = response
-    return response, chat_history
-# Interface Gradio
 def gradio_interface(user_input, chat_history):
     chat_history = json.loads(chat_history) if chat_history else []
-    response, updated_chat_history = agent_manager(chat_history, user_input)
-    variables_context = get_variables_context()
-    return response, json.dumps(updated_chat_history), variables_context
 with gr.Blocks() as demo:
-    gr.Markdown("## AgentManager - Test d'Interactions Collaboratives")
     with gr.Row():
         with gr.Column():
             user_input = gr.Textbox(label="Entrée utilisateur", placeholder="Entrez une requête ou une instruction.")
@@ -159,4 +165,4 @@ with gr.Blocks() as demo:
 # Lancer l'interface
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 from datetime import datetime
 import os
 import json
 from bs4 import BeautifulSoup
 from concurrent.futures import ThreadPoolExecutor
 import re
+from threading import Thread
 # --- Configuration du logger ---
 logging.basicConfig(
 )
 # --- Authentification Hugging Face ---
+login(token=os.environ["HF_TOKEN"]))
 # Variables globales
 project_state = {
     "AgentManager": {"structured_summary": None},
 }
 # Chargement du modèle
+manager_model_name = "meta-llama/Llama-3.2-3B-Instruct"
 manager_model = AutoModelForCausalLM.from_pretrained(
     manager_model_name,
     device_map="auto",
+    torch_dtype=torch.bfloat16
 )
 manager_tokenizer = AutoTokenizer.from_pretrained(manager_model_name)
     response = response.replace(system_prompt, "").replace(conversation_context, "").strip()
     return response
+# Fonction principale avec streaming
 def agent_manager(chat_history, user_input):
+    """Gère les interactions utilisateur et assistant avec streaming."""
     # Préparer le contexte des variables
     variables_context = get_variables_context()
     # Ajouter l'entrée utilisateur actuelle
     chat_history.append({"user": user_input, "assistant": ""})
+    # Préparation des tokens et du streamer
+    inputs = manager_tokenizer(system_prompt + "\nUtilisateur : " + user_input, return_tensors="pt").to(manager_model.device)
+    attention_mask = inputs.attention_mask
+    streamer = TextIteratorStreamer(manager_tokenizer, skip_special_tokens=True)
+    # Thread pour la génération
+    generation_kwargs = dict(
+        inputs=inputs.input_ids,
+        attention_mask=attention_mask,
         max_new_tokens=MAX_NEW_TOKENS,
         temperature=TEMPERATURE,
         top_p=TOP_P,
         eos_token_id=manager_tokenizer.eos_token_id,
+        pad_token_id=manager_tokenizer.pad_token_id,
+        streamer=streamer
     )
+    generation_thread = Thread(target=manager_model.generate, kwargs=generation_kwargs)
+    generation_thread.start()
+    partial_response = ""
+    for new_text in streamer:
+        partial_response += new_text
+        clean_partial_response = clean_output(partial_response, system_prompt, conversation_context)
+        chat_history[-1]["assistant"] = clean_partial_response
+        yield clean_partial_response, json.dumps(chat_history), get_variables_context()
+# Interface Gradio avec Streaming
 def gradio_interface(user_input, chat_history):
     chat_history = json.loads(chat_history) if chat_history else []
+    response_generator = agent_manager(chat_history, user_input)
+    for response, updated_chat_history, variables_context in response_generator:
+        yield response, updated_chat_history, variables_context
 with gr.Blocks() as demo:
+    gr.Markdown("## AgentManager - Test d'Interactions Collaboratives avec Streaming")
     with gr.Row():
         with gr.Column():
             user_input = gr.Textbox(label="Entrée utilisateur", placeholder="Entrez une requête ou une instruction.")
 # Lancer l'interface
 if __name__ == "__main__":
+    demo.queue().launch()