Spaces:

gabrix00
/

grammarllm

Runtime error

App Files Files Community

Gabriele Tuccio commited on Jun 9, 2025

Commit

cbb9121

1 Parent(s): 4e0abc9

update

Browse files

Files changed (2) hide show

app.py +197 -71
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -371,8 +371,36 @@ def generate_text(model, tokenizer, text, logit_processor, streamer, max_new_tok
         raise RuntimeError(f"Errore nella generazione del testo: {e}")
-def run_grammarllm(prompt, productions_json, regex_json):
     setup_logging()
     # Parsing productions
@@ -381,75 +409,113 @@ def run_grammarllm(prompt, productions_json, regex_json):
     except json.JSONDecodeError:
         return "Errore: JSON productions non valido.", None
-    # Parsing regex_dict
     try:
-        regex_raw = json.loads(regex_json)
         regex_dict = {key: re.compile(pattern) for key, pattern in regex_raw.items()}
-    except json.JSONDecodeError:
-        return "Errore: JSON regex non valido.", None
     except re.error as e:
         return f"Errore nella compilazione regex: {str(e)}", None
     try:
-        tokenizer = AutoTokenizer.from_pretrained("gpt2")
-        model = AutoModelForCausalLM.from_pretrained("gpt2")
         pars_table, map_terminal_tokens = get_parsing_table_and_map_tt(
-            tokenizer,
-            productions=productions,
             regex_dict=regex_dict,
         )
         LogitProcessor, Streamer = generate_grammar_parameters(tokenizer, pars_table, map_terminal_tokens)
         output = generate_text(model, tokenizer, prompt, LogitProcessor, Streamer)
         temp_dir = "./temp"
         zip_path = temp_dir + ".zip"
-        with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
-            for root, dirs, files in os.walk(temp_dir):
-                for file in files:
-                    file_path = os.path.join(root, file)
-                    arcname = os.path.relpath(file_path, temp_dir)
-                    zipf.write(file_path, arcname)
         return output, zip_path
     except Exception as e:
         return f"Errore durante l'inferenza: {str(e)}", None
 default_grammars = {
     "Default Grammar": json.dumps({
-      "S*": ["<<positive>> A", "<<negative>> B", "<<neutral>> C"],
-      "A": ["<<happy>> D", "<<peaceful>> E", "<<joyful>> F"],
-      "B": ["<<sad>>", "<<angry>>", "<<frustrated>>"],
-      "C": ["<<calm>>", "<<indifferent>>", "<<unemotional>>"],
-      "D": ["<<enthusiastic>>"],
-      "E": ["<<content>>"],
-      "F": ["<<excited>>"]
     }, indent=4),
     "Other example": json.dumps({
-      'S*': ["<<(>> A B", "<<negligent>> V", '<<indifferent>>'],
-      'A': ["number", "letters", "ε"],
-      'B': ['<<)>> letters R'],
-      'R': ['C', 'D'],
-      'C': ['<<calm>>', '<<indifferent>>', '<<unemotional>>'],
-      'D': ['<<angry>>', '<<frustrated>>'],
-      'V': ["<<option>>"],
     }, indent=4),
 }
-default_regex_json = json.dumps({
-  "regex_alfanum": "[a-zA-Z0-9]+",
-  "regex_letters": "[a-zA-Z]+",
-  "regex_number": "\\d+",
-  "regex_decimal": "\\d+([.,]\\d+)?",
-  "regex_var": "[a-zA-Z_][a-zA-Z0-9_]*",
-  "regex_)": "\\)",
-  "regex_(": "\\("
-}, indent=4)
 def update_productions(grammar_choice):
     # Aggiorna textbox productions al cambio preset
@@ -458,39 +524,98 @@ def update_productions(grammar_choice):
 def load_file(file_obj):
     if file_obj is None:
-        return ""
     try:
-        content = file_obj.read().decode("utf-8")
-        # opzionale: validare JSON?
-        json.loads(content)
         return content
     except Exception as e:
         return f"Errore nel caricamento file: {str(e)}"
-with gr.Blocks() as demo:
-    prompt_input = gr.Textbox(label="Inserisci prompt testuale")
     with gr.Row():
-        grammar_choice = gr.Dropdown(
-            list(default_grammars.keys()),
-            label="Scegli Productions (JSON)",
-            value="Default Grammar",
-            interactive=True,
-            elem_id="grammar_choice"
-        )
-        productions_upload = gr.File(label="Carica file Productions (JSON)", file_types=['.json'])
-    productions_text = gr.Textbox(label="Productions (JSON)", lines=10, value=default_grammars["Default Grammar"])
     with gr.Row():
-        regex_upload = gr.File(label="Carica file Regex_dict (JSON)", file_types=['.json'])
-    regex_text = gr.Textbox(label="Inserisci regex_dict (JSON)", lines=10, value=default_regex_json)
-    output_text = gr.Textbox(label="Output generato")
-    zip_file = gr.File(label="Scarica ZIP")
     # Callback: quando cambio dropdown, aggiorno productions_text
     grammar_choice.change(
@@ -506,20 +631,21 @@ with gr.Blocks() as demo:
         outputs=productions_text,
     )
-    # Callback: quando carico file regex, aggiorno regex_text
-    regex_upload.upload(
-        fn=load_file,
-        inputs=regex_upload,
-        outputs=regex_text,
-    )
     # Al submit del form chiamo run_grammarllm
-    submit_btn = gr.Button("Genera output")
     submit_btn.click(
         fn=run_grammarllm,
-        inputs=[prompt_input, productions_text, regex_text],
         outputs=[output_text, zip_file],
     )
 if __name__ == "__main__":

         raise RuntimeError(f"Errore nella generazione del testo: {e}")
+import gradio as gr
+import json
+import re
+import os
+import zipfile
+import spaces
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+# Assumendo che queste funzioni esistano nel tuo modulo
+# from your_module import get_parsing_table_and_map_tt, generate_grammar_parameters, generate_text, setup_logging
+def setup_logging():
+    # Implementa il tuo setup di logging qui
+    pass
+def get_parsing_table_and_map_tt(tokenizer, productions, regex_dict):
+    # Implementa la tua logica qui
+    pass
+def generate_grammar_parameters(tokenizer, pars_table, map_terminal_tokens):
+    # Implementa la tua logica qui
+    pass
+def generate_text(model, tokenizer, prompt, LogitProcessor, Streamer):
+    # Implementa la tua logica qui
+    pass
+@spaces.GPU
+def run_grammarllm(prompt, productions_json, model_choice):
     setup_logging()
     # Parsing productions
     except json.JSONDecodeError:
         return "Errore: JSON productions non valido.", None
+    # Regex fissa, non caricata dall'utente
+    regex_raw = {
+        "regex_alfanum": "[a-zA-Z0-9]+",
+        "regex_letters": "[a-zA-Z]+",
+        "regex_number": "\\d+",
+        "regex_decimal": "\\d+([.,]\\d+)?",
+        "regex_var": "[a-zA-Z_][a-zA-Z0-9_]*",
+        "regex_)": "\\)",
+        "regex_(": "\\("
+    }
     try:
         regex_dict = {key: re.compile(pattern) for key, pattern in regex_raw.items()}
     except re.error as e:
         return f"Errore nella compilazione regex: {str(e)}", None
     try:
+        # Selezione del modello basata sulla scelta dell'utente
+        if model_choice == "GPT-2":
+            model_name = "gpt2"
+        elif model_choice == "Llama 3.2 3B":
+            model_name = "meta-llama/Llama-3.2-3B"
+        elif model_choice == "Llama 3.2 1B":
+            model_name = "meta-llama/Llama-3.2-1B"
+        else:
+            return f"Modello non supportato: {model_choice}", None
+        # Caricamento del tokenizer e del modello
+        print(f"Caricamento del modello: {model_name}")
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        # Configurazione del device e dtype per ottimizzare le prestazioni
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        if model_choice.startswith("Llama"):
+            # Per i modelli Llama, usa torch_dtype=torch.float16 per risparmiare memoria
+            model = AutoModelForCausalLM.from_pretrained(
+                model_name,
+                torch_dtype=torch.float16,
+                device_map="auto",
+                trust_remote_code=True
+            )
+        else:
+            # Per GPT-2
+            model = AutoModelForCausalLM.from_pretrained(model_name)
+            model = model.to(device)
+        # Aggiungi pad_token se non esiste
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
         pars_table, map_terminal_tokens = get_parsing_table_and_map_tt(
+            tokenizer,
+            productions=productions,
             regex_dict=regex_dict,
         )
         LogitProcessor, Streamer = generate_grammar_parameters(tokenizer, pars_table, map_terminal_tokens)
         output = generate_text(model, tokenizer, prompt, LogitProcessor, Streamer)
+        # Creazione del file ZIP
         temp_dir = "./temp"
         zip_path = temp_dir + ".zip"
+        # Assicurati che temp_dir esista
+        if os.path.exists(temp_dir):
+            with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
+                for root, dirs, files in os.walk(temp_dir):
+                    for file in files:
+                        file_path = os.path.join(root, file)
+                        arcname = os.path.relpath(file_path, temp_dir)
+                        zipf.write(file_path, arcname)
+        else:
+            zip_path = None
+        # Libera la memoria del modello
+        del model
+        torch.cuda.empty_cache() if torch.cuda.is_available() else None
         return output, zip_path
     except Exception as e:
         return f"Errore durante l'inferenza: {str(e)}", None
 default_grammars = {
     "Default Grammar": json.dumps({
+        "S*": ["<<positive>> A", "<<negative>> B", "<<neutral>> C"],
+        "A": ["<<happy>> D", "<<peaceful>> E", "<<joyful>> F"],
+        "B": ["<<sad>>", "<<angry>>", "<<frustrated>>"],
+        "C": ["<<calm>>", "<<indifferent>>", "<<unemotional>>"],
+        "D": ["<<enthusiastic>>"],
+        "E": ["<<content>>"],
+        "F": ["<<excited>>"]
     }, indent=4),
     "Other example": json.dumps({
+        'S*': ["<<(>> A B", "<<negligent>> V", '<<indifferent>>'],
+        'A': ["number", "letters", "ε"],
+        'B': ['<<)>> letters R'],
+        'R': ['C', 'D'],
+        'C': ['<<calm>>', '<<indifferent>>', '<<unemotional>>'],
+        'D': ['<<angry>>', '<<frustrated>>'],
+        'V': ["<<option>>"],
     }, indent=4),
 }
 def update_productions(grammar_choice):
     # Aggiorna textbox productions al cambio preset
 def load_file(file_obj):
     if file_obj is None:
+        return "Errore: nessun file caricato."
     try:
+        # In newer Gradio versions, file_obj is a path string, not a file object
+        if isinstance(file_obj, str):
+            # file_obj is the file path
+            with open(file_obj, 'r', encoding='utf-8') as f:
+                content = f.read()
+        else:
+            # Fallback for older Gradio versions or different file object types
+            if hasattr(file_obj, 'name'):
+                # file_obj has a 'name' attribute containing the path
+                with open(file_obj.name, 'r', encoding='utf-8') as f:
+                    content = f.read()
+            else:
+                # Try to read directly (old behavior)
+                content = file_obj.read().decode("utf-8")
+        json.loads(content)  # controlla che sia JSON valido
         return content
     except Exception as e:
         return f"Errore nel caricamento file: {str(e)}"
+# Interfaccia Gradio migliorata
+with gr.Blocks(title="GrammarLLM - Inferenza Guidata da Grammatica") as demo:
+    gr.Markdown("# GrammarLLM - Generazione di Testo Guidata da Grammatica")
+    gr.Markdown("Genera testo strutturato utilizzando grammatiche personalizzate con supporto per GPT-2 e modelli Llama.")
+    with gr.Row():
+        with gr.Column(scale=2):
+            prompt_input = gr.Textbox(
+                label="Inserisci prompt testuale",
+                placeholder="Scrivi qui il tuo prompt...",
+                lines=3
+            )
+        with gr.Column(scale=1):
+            model_choice = gr.Dropdown(
+                choices=["GPT-2", "Llama 3.2 1B", "Llama 3.2 3B"],
+                label="Scegli Modello",
+                value="GPT-2",
+                interactive=True
+            )
     with gr.Row():
+        with gr.Column():
+            grammar_choice = gr.Dropdown(
+                list(default_grammars.keys()),
+                label="Scegli Productions (JSON)",
+                value="Default Grammar",
+                interactive=True,
+                elem_id="grammar_choice"
+            )
+        with gr.Column():
+            productions_upload = gr.File(
+                label="Carica file Productions (JSON)",
+                file_types=['.json']
+            )
+    productions_text = gr.Textbox(
+        label="Productions (JSON)",
+        lines=15,
+        value=default_grammars["Default Grammar"],
+        info="Modifica direttamente la grammatica in formato JSON"
+    )
     with gr.Row():
+        submit_btn = gr.Button("🚀 Genera Output", variant="primary", size="lg")
+        clear_btn = gr.Button("🗑️ Pulisci", variant="secondary")
+    with gr.Row():
+        with gr.Column():
+            output_text = gr.Textbox(
+                label="Output generato",
+                lines=10,
+                show_copy_button=True
+            )
+        with gr.Column():
+            zip_file = gr.File(label="📦 Scarica ZIP (se disponibile)")
+    # Informazioni sui modelli
+    with gr.Accordion("ℹ️ Informazioni sui Modelli", open=False):
+        gr.Markdown("""
+        - **GPT-2**: Modello classico, veloce e leggero
+        - **Llama 3.2 1B**: Modello più recente e performante, dimensione ridotta
+        - **Llama 3.2 3B**: Modello più grande e capace, richiede più risorse
+        *Nota: I modelli Llama utilizzano Zero GPU per l'accelerazione automatica.*
+        """)
     # Callback: quando cambio dropdown, aggiorno productions_text
     grammar_choice.change(
         outputs=productions_text,
     )
     # Al submit del form chiamo run_grammarllm
     submit_btn.click(
         fn=run_grammarllm,
+        inputs=[prompt_input, productions_text, model_choice],
         outputs=[output_text, zip_file],
+        show_progress=True
+    )
+    # Funzione per pulire i campi
+    def clear_fields():
+        return "", default_grammars["Default Grammar"], None, None
+    clear_btn.click(
+        fn=clear_fields,
+        outputs=[prompt_input, productions_text, output_text, zip_file]
     )
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -3,4 +3,5 @@ tqdm
 transformers
 setuptools
 accelerate>=0.26.0
-gradio

 transformers
 setuptools
 accelerate>=0.26.0
+gradio
+spaces