Spaces:
Sleeping
Sleeping
| import logging | |
| import textwrap | |
| from typing import Literal, Optional | |
| import os | |
| import gradio as gr | |
| import outlines | |
| import pandas as pd | |
| import spaces | |
| import torch | |
| from outlines import generate | |
| from peft import PeftConfig, PeftModel | |
| from pydantic import BaseModel, ConfigDict | |
| from transformers import ( | |
| AutoModelForCausalLM, | |
| AutoModelForSequenceClassification, | |
| AutoTokenizer, | |
| BitsAndBytesConfig, | |
| ) | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| MODEL_ID = "rshwndsz/ft-longformer-base-4096" | |
| DEVICE_MAP = "auto" | |
| QUANTIZATION_BITS = None | |
| TEMPERATURE = 0.0 | |
| AVAILABLE_MODELS = [ | |
| "rshwndsz/ft-longformer-base-4096", | |
| "rshwndsz/ft-hermes-3-llama-3.2-3b", | |
| "rshwndsz/ft-phi-3.5-mini-instruct", | |
| "rshwndsz/ft-mistral-7b-v0.3-instruct", | |
| "rshwndsz/ft-phi-4", | |
| "rshwndsz/ft_paraphrased-hermes-3-llama-3.2-3b", | |
| "rshwndsz/ft_paraphrased-longformer-base-4096", | |
| "rshwndsz/ft_paraphrased-phi-3.5-mini-instruct", | |
| "rshwndsz/ft_paraphrased-mistral-7b-v0.3-instruct", | |
| "rshwndsz/ft_paraphrased-phi-4", | |
| ] | |
| DEFAULT_MODEL_ID = AVAILABLE_MODELS[0] | |
| # Define response model | |
| class ResponseModel(BaseModel): | |
| score: Literal["0", "1"] | |
| SYSTEM_PROMPT = textwrap.dedent(""" | |
| You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information: | |
| 1. A story that was presented to participants as context | |
| 2. The question that participants were asked to answer | |
| 3. A grading scheme to evaluate the answers (Correct Responses:1, incorrect response:0, Incomplete response:0, Irrelevant:0) | |
| 4. Grading examples | |
| 5. A participant answer | |
| Your task is to grade each answer according to the grading scheme. For each answer, you should: | |
| 1. Carefully read and understand the answer and compare it to the grading criteria | |
| 2. Assigning an score 1 or 0 for each answer. | |
| """).strip() | |
| PROMPT_TEMPLATE = textwrap.dedent(""" | |
| <Story> | |
| {story} | |
| </Story> | |
| <Question> | |
| {question} | |
| </Question> | |
| <GradingScheme> | |
| {grading_scheme} | |
| </GradingScheme> | |
| <Answer> | |
| {answer} | |
| </Answer> | |
| Score:""").strip() | |
| if is_huggingface_space(): | |
| DEVICE_MAP = "cpu" | |
| QUANTIZATION_BITS = None | |
| else: | |
| DEVICE_MAP = "auto" | |
| QUANTIZATION_BITS = 4 # or whatever you prefer for local deployment | |
| def is_huggingface_space(): | |
| return os.environ.get('SPACE_ID') is not None | |
| def get_outlines_model( | |
| model_id: str, device_map: str = "cpu", quantization_bits: Optional[int] = None | |
| ): | |
| # Skip quantization on CPU | |
| if device_map == "cpu": | |
| quantization_config = None | |
| else: | |
| # Your existing quantization logic | |
| pass | |
| if quantization_bits == 4: | |
| quantization_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_use_double_quant=True, | |
| bnb_4bit_compute_dtype=torch.bfloat16, | |
| ) | |
| elif quantization_bits == 8: | |
| quantization_config = BitsAndBytesConfig(load_in_8bit=True) | |
| else: | |
| quantization_config = None | |
| if "longformer" in model_id: | |
| hf_model = AutoModelForSequenceClassification.from_pretrained(model_id) | |
| hf_tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| return hf_model, hf_tokenizer | |
| peft_config = PeftConfig.from_pretrained(model_id) | |
| base_model_id = peft_config.base_model_name_or_path | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| base_model_id, | |
| device_map=device_map, | |
| quantization_config=quantization_config, | |
| torch_dtype=torch.bfloat16, | |
| ) | |
| hf_model = PeftModel.from_pretrained(base_model, model_id) | |
| hf_tokenizer = AutoTokenizer.from_pretrained( | |
| base_model_id, use_fast=True, clean_up_tokenization_spaces=True | |
| ) | |
| hf_tokenizer.pad_token = hf_tokenizer.eos_token | |
| return hf_model, hf_tokenizer | |
| def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -> str: | |
| prompt = PROMPT_TEMPLATE.format( | |
| story=story.strip(), | |
| question=question.strip(), | |
| grading_scheme=grading_scheme.strip(), | |
| answer=answer.strip(), | |
| ) | |
| full_prompt = SYSTEM_PROMPT + "\n\n" + prompt | |
| return full_prompt | |
| # @spaces.GPU | |
| def label_single_response_with_model(model_id, story, question, criteria, response): | |
| prompt = format_prompt(story, question, criteria, response) | |
| logger.info(f"Prompt: {prompt}") | |
| if "longformer" in model_id: | |
| model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS) | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=4096) | |
| with torch.no_grad(): | |
| logits = model(**inputs).logits | |
| predicted_class = torch.argmax(logits, dim=1).item() | |
| logger.info(f"Predicted class: {predicted_class}") | |
| return str(predicted_class) | |
| else: | |
| model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS) | |
| # Use structured generation with outlines | |
| generator = generate.json(model, ResponseModel) | |
| result = generator(prompt, max_tokens=20) | |
| logger.info(f"Generated result: {result}") | |
| return result.score | |
| # @spaces.GPU | |
| def label_multi_responses_with_model( | |
| model_id, story, question, criteria, response_file | |
| ): | |
| df = pd.read_csv(response_file.name) | |
| assert "response" in df.columns, "CSV must contain a 'response' column." | |
| if "longformer" in model_id: | |
| model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS) | |
| prompts = [ | |
| format_prompt(story, question, criteria, resp) for resp in df["response"] | |
| ] | |
| inputs = tokenizer(prompts, return_tensors="pt", truncation=True, padding=True, max_length=4096) | |
| with torch.no_grad(): | |
| logits = model(**inputs).logits | |
| predicted_classes = torch.argmax(logits, dim=1).tolist() | |
| scores = [str(cls) for cls in predicted_classes] | |
| else: | |
| model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS) | |
| generator = generate.json(model, ResponseModel) | |
| scores = [] | |
| for resp in df["response"]: | |
| prompt = format_prompt(story, question, criteria, resp) | |
| result = generator(prompt, max_tokens=20) | |
| scores.append(result.score) | |
| df["score"] = scores | |
| return df | |
| # Rest of the code remains the same... |