Spaces:
Build error
Build error
| from selfcheckgpt.modeling_selfcheck import SelfCheckNLI, SelfCheckBERTScore, SelfCheckNgram | |
| import torch | |
| import spacy | |
| import os | |
| import gradio as gr | |
| # Load the English language model | |
| nlp = spacy.load("en_core_web_sm") | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| selfcheck_nli = SelfCheckNLI(device=device) # set device to 'cuda' if GPU is available | |
| selfcheck_bertscore = SelfCheckBERTScore(rescale_with_baseline=True) | |
| selfcheck_ngram = SelfCheckNgram(n=1) # n=1 means Unigram, n=2 means Bigram, etc. | |
| openai_key = os.getenv("OPENAI_API_KEY") | |
| resource_url = os.getenv("OPENAI_API_RESOURCEURL") | |
| api_version =os.getenv("OPENAI_API_VERSION") | |
| api_url=os.getenv("OPENAI_API_RESOURCEURL") | |
| import os | |
| from openai import AzureOpenAI | |
| client = AzureOpenAI( | |
| api_key=openai_key, | |
| api_version=api_version, | |
| azure_endpoint = api_url | |
| ) | |
| deployment_name=os.getenv("model_name") #This will correspond to the custom name you chose for your deployment when you deployed a model. Use a gpt-35-turbo-instruct deployment. | |
| import os | |
| from openai import AzureOpenAI | |
| client = AzureOpenAI( | |
| api_key = openai_key, | |
| api_version =api_version, | |
| azure_endpoint =api_url | |
| ) | |
| def generate_response(prompt): | |
| response = client.chat.completions.create( | |
| model=deployment_name, # model = "deployment_name". | |
| temperature=0.0, | |
| messages=[ | |
| {"role": "user", "content": prompt} | |
| ] | |
| ) | |
| return response.choices[0].message.content | |
| def generate_response_high_temp(prompt): | |
| response = client.chat.completions.create( | |
| model=deployment_name, # model = "deployment_name". | |
| temperature=1.0, | |
| messages=[ | |
| {"role": "user", "content": prompt} | |
| ] | |
| ) | |
| return response.choices[0].message.content | |
| def create_dataset(prompt): | |
| s1 = generate_response_high_temp(prompt) | |
| s2 = generate_response_high_temp(prompt) | |
| s3 = generate_response_high_temp(prompt) | |
| return s1, s2, s3 | |
| def split_sent(sentence): | |
| return [sent.text.strip() for sent in nlp(sentence).sents] | |
| def func_selfcheck_nli(sentence, s1, s2, s3): | |
| sentence1 = [sentence[2:-2]] | |
| sample_dataset = [s1, s2, s3] | |
| score = selfcheck_nli.predict( | |
| sentences = sentence1, # list of sentences | |
| sampled_passages = sample_dataset, # list of sampled passages | |
| ) | |
| if (score > 0.35): | |
| return f"The LLM is hallucinating with selfcheck nli score of {score}" | |
| else: | |
| return f"The LLM is generating true information with selfcheck nli score of {score}" | |
| def func_selfcheckbert(sentence, s1, s2, s3): | |
| sentence1 = [sentence[2:-2]] | |
| sample_dataset = [s1, s2, s3] | |
| sent_scores_bertscore = selfcheck_bertscore.predict( | |
| sentences = sentence1, # list of sentences | |
| sampled_passages = sample_dataset, # list of sampled passages | |
| ) | |
| if (sent_scores_bertscore > 0.6): | |
| return f"The LLM is hallucinating with selfcheck BERT score of {sent_scores_bertscore}" | |
| else: | |
| return f"The LLM is generating true information with selfcheck BERT score of {sent_scores_bertscore}" | |
| def func_selfcheckngram(sentence, s1, s2, s3): | |
| sentence1 = [sentence[2:-2]] | |
| sample_dataset = [s1, s2, s3] | |
| sentences_split = split_sent(sentence1[0]) | |
| sent_scores_ngram = selfcheck_ngram.predict( | |
| sentences = sentences_split, | |
| passage = sentence1[0], | |
| sampled_passages = sample_dataset, | |
| ) | |
| avg_max_neg_logprob = sent_scores_ngram['doc_level']['avg_max_neg_logprob'] | |
| if(avg_max_neg_logprob > 6): | |
| return f"The LLM is hallucinating with selfcheck ngram score of {avg_max_neg_logprob}" | |
| else: | |
| return f"The LLM is generating true information with selfcheck ngram score of {avg_max_neg_logprob}" | |
| return sent_scores_ngram | |
| def generating_samples(prompt): | |
| prompt_template=f"This is a Wikipedia passage on the topic of '{prompt}' in 100 words" | |
| sample_response=generate_response(prompt_template) | |
| s1, s2, s3 =create_dataset(prompt_template) | |
| sentence=[sample_response] | |
| return sentence, s1, s2, s3 | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| """ | |
| <h1> LLM Hackathon : LLM Hallucination Detector <h1> | |
| """) | |
| with gr.Column(): | |
| prompt = gr.Textbox(label="prompt") | |
| with gr.Column(): | |
| sentence = gr.Textbox(label="response") | |
| print(sentence) | |
| with gr.Row(): | |
| s1 = gr.Textbox(label="sample1") | |
| s2 = gr.Textbox(label="sample2") | |
| s3 = gr.Textbox(label="sample3") | |
| with gr.Column(): | |
| score= gr.Textbox(label="output") | |
| output_response = gr.Button("Generate response") | |
| output_response.click( | |
| fn=generating_samples, | |
| inputs=prompt, | |
| outputs=[sentence, s1, s2, s3] | |
| ) | |
| with gr.Row(equal_height=True): | |
| self_check_nli_button = gr.Button("self check nli") | |
| self_check_nli_button.click( | |
| fn=func_selfcheck_nli, | |
| inputs=[sentence, s1, s2, s3], | |
| outputs=score | |
| ) | |
| selfcheckbert_button = gr.Button("self check Bert") | |
| selfcheckbert_button.click( | |
| fn=func_selfcheckbert, | |
| inputs=[sentence, s1, s2, s3], | |
| outputs=score | |
| ) | |
| self_check_ngram_button = gr.Button("self check ngram") | |
| self_check_ngram_button.click( | |
| fn=func_selfcheckngram, | |
| inputs=[sentence, s1, s2, s3], | |
| outputs=score | |
| ) | |
| demo.launch() |