| import spaces |
| import gradio as gr |
| import torch |
| import traceback |
|
|
| from peft import AutoPeftModelForCausalLM |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,TextStreamer |
|
|
| |
| device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
| print("¿GPU disponible?", torch.cuda.is_available()) |
| print("Número de GPUs:", torch.cuda.device_count()) |
|
|
| |
| model_name = "Projener/AIPlannerModel_MISTRAL-7B" |
|
|
| |
| quantization_config = BitsAndBytesConfig( |
| load_in_4bit=True, |
| bnb_4bit_compute_dtype=torch.float16 |
| ) |
|
|
| try: |
| tokenizer = AutoTokenizer.from_pretrained(model_name) |
| model = AutoPeftModelForCausalLM.from_pretrained( |
| model_name, |
| quantization_config=quantization_config, |
| device_map={"": 0}, |
| ).to(device) |
| except Exception as e: |
| model = None |
| error_message = f"Error al cargar el modelo: {str(e)}" |
|
|
| |
| alpaca_prompt = """ |
| ### Instruction: |
| {} |
| |
| ### Input: |
| {} |
| |
| ### Response: |
| {} |
| """ |
|
|
| |
| @spaces.GPU(duration=120) |
| def generate_response_stream(start_date, end_date, total_duration, total_power, max_new_tokens): |
| if model is None: |
| yield "El modelo no pudo cargarse. Verifica la configuración de tu entorno." |
| return |
|
|
| |
| instruction = ( |
| "Generate a project plan for constructing a photovoltaic plant always including 100 tasks. " |
| "Each task has a fixed name and order. Calculate start and end dates for each task, based on the project characteristics provided, " |
| "ensuring all task durations are in working days only." |
| ) |
| |
| project_characteristics = ( |
| f"Project Characteristics:\n" |
| f"- start_date: {start_date}\n" |
| f"- end_date: {end_date}\n" |
| f"- total_duration: {total_duration} working days\n" |
| f"- total_power: {total_power} MW" |
| ) |
| |
| |
| context_prefix = f"### Instruction:\n{instruction}\n### Input:\n{project_characteristics}\n### Response:\n" |
|
|
| |
| inputs = tokenizer([context_prefix], return_tensors="pt").to(device) |
|
|
| try: |
| |
| outputs = model.generate( |
| inputs["input_ids"], |
| max_new_tokens=max_new_tokens, |
| use_cache=True |
| ) |
|
|
| |
| decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
| |
| if "### Response:" in decoded_output: |
| response = decoded_output.split("### Response:")[1].strip() |
| else: |
| response = "No se encontró una respuesta válida en la salida." |
|
|
| yield response[:22400] |
|
|
| except Exception as e: |
| import traceback |
| error_details = traceback.format_exc() |
| yield f"Error durante la generación:\n{error_details}" |
|
|
|
|
| with gr.Blocks() as demo: |
| gr.Markdown("## AIPlanner Mistral 7B") |
| |
| |
| with gr.Row(): |
| start_date = gr.Textbox(label="Fecha de Inicio", value="2022-04-18") |
| end_date = gr.Textbox(label="Fecha de Fin", value="2023-08-18") |
| |
| |
| with gr.Row(): |
| total_duration = gr.Number(label="Duración Total (días laborales)", value=350) |
| total_power = gr.Number(label="Potencia Total (MW)", value=400.0) |
|
|
| |
| with gr.Row(): |
| max_new_tokens = gr.Slider( |
| label="Tokens máximos por generación", |
| minimum=64, |
| maximum=2048, |
| step=64, |
| value=128, |
| interactive=True |
| ) |
|
|
| |
| generate_button = gr.Button("Generar Planificación") |
| output_box = gr.Textbox(label="Respuesta Generada", lines=15) |
|
|
| |
| generate_button.click( |
| fn=generate_response_stream, |
| inputs=[start_date, end_date, total_duration, total_power, max_new_tokens], |
| outputs=output_box |
| ) |
|
|
| demo.launch(share=True) |
|
|
|
|