Spaces:

CGIAR
/

farmerbot

Paused

File size: 4,323 Bytes

0c6d13f

from transformers import AutoTokenizer, AutoModelForCausalLM, T5Tokenizer, T5ForConditionalGeneration, pipeline
from langchain import HuggingFacePipeline
import os
import torch

def load_model_and_pipeline(model_info, quantization=4, is_t5=False, use_local=True):
    # Check if the model is local or should be downloaded from Hugging Face
    # if use_local:
    #     path = f"models/{model_info}"
    #     if not os.path.exists(path):
    #         print(f"Local model not found at {path}. Downloading from Hugging Face...")
    #         use_local = False  # Fallback to Hugging Face download if local not found
    # if not use_local:
    #     # Replace model_info with the corresponding Hugging Face repo name
    #     hf_model_map = {
    #         "zephyr-7b-beta": "HuggingFaceH4/zephyr-7b-beta",
    #         "llama-3-8b": "NousResearch/Meta-Llama-3-8B",
    #         "mistral-7b": "unsloth/mistral-7b-instruct-v0.3",
    #         "phi-3-mini": "microsoft/Phi-3-mini-4k-instruct",
    #         "flan-t5-base": "google/flan-t5-base"
    #     }
    #     path = hf_model_map.get(model_info.split("_")[1], model_info)

    tokenizer = AutoTokenizer.from_pretrained(model_info, use_auth_token=True)

    if quantization == "8":
        model = AutoModelForCausalLM.from_pretrained(
            model_info,
            device_map='auto',
            torch_dtype=torch.float16,
            use_auth_token=True,
            load_in_8bit=True
        )
    else:
        model = AutoModelForCausalLM.from_pretrained(
            model_info,
            device_map='auto',
            torch_dtype=torch.float16,
            use_auth_token=True,
            load_in_4bit=True
        )

    if is_t5:
        model = T5ForConditionalGeneration.from_pretrained(model_info)
        tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")

    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        max_new_tokens=512,
        do_sample=True,
        top_k=30,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id
    )

    llm = HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature': 0})
    return tokenizer, model, llm

def zephyr_model(model_info, quantization, use_local=True):
    return load_model_and_pipeline(model_info, quantization, use_local=use_local)

def llama_model(model_info, quantization, use_local=True):
    return load_model_and_pipeline(model_info, quantization, use_local=use_local)

def mistral_model(model_info, quantization, use_local=True):
    return load_model_and_pipeline(model_info, quantization, use_local=use_local)

def phi_model(model_info, quantization, use_local=True):
    return load_model_and_pipeline(model_info, quantization, use_local=use_local)

def flant5_model(model_info, use_local=True):
    return load_model_and_pipeline(model_info, is_t5=True, use_local=use_local)


import pandas as pd
from datasets import Dataset

def calculate_rag_metrics(model_ques_ans_gen, llm_model, embedding_model="BAAI/bge-base-en-v1.5"):
    # Create a dictionary from the model_ques_ans_gen list
    from ragas import evaluate
    from ragas.metrics import faithfulness, answer_correctness,answer_similarity,answer_relevancy,context_recall, context_precision
    data_samples = {
        'question': [item['question'] for item in model_ques_ans_gen],
        'answer': [item['answer'] for item in model_ques_ans_gen],
        'contexts': [item['contexts'] for item in model_ques_ans_gen],
        'ground_truths': [item['ground_truths'] for item in model_ques_ans_gen]
    }

    # Convert the dictionary to a pandas DataFrame
    rag_df = pd.DataFrame(data_samples)

    # Convert the DataFrame to a HuggingFace Dataset
    rag_eval_dataset = Dataset.from_pandas(rag_df)

    # Define the list of metrics to calculate
    metrics = [
        "answer_correctness", "answer_similarity", 
        "answer_relevancy", "faithfulness", 
        "context_recall", "context_precision"
    ]

    # Perform the evaluation using the provided LLM and embedding models
    result = evaluate(
        rag_eval_dataset,
        metrics=metrics,
        llm=llm_model,
        embeddings=embedding_model
    )
    result.to_pandas()
    return result