Spaces:

ActiveYixiao
/

automatic_coding

Sleeping

App Files Files Community

ActiveYixiao commited on Aug 29, 2025

Commit

e6a7fa6

verified ·

1 Parent(s): c38da00

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -86

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import logging
 import textwrap
 from typing import Literal, Optional
 import gradio as gr
 import outlines
 import pandas as pd
@@ -19,8 +18,9 @@ from transformers import (
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 DEVICE_MAP = "auto"
-QUANTIZATION_BITS = None
 TEMPERATURE = 0.0
 AVAILABLE_MODELS = [
@@ -37,12 +37,40 @@ AVAILABLE_MODELS = [
 ]
 DEFAULT_MODEL_ID = AVAILABLE_MODELS[0]
-# Use a simpler prompt format that might be closer to your training data
 PROMPT_TEMPLATE = textwrap.dedent("""
-Story: {story}
-Question: {question}
-Grading Scheme: {grading_scheme}
-Answer: {answer}
 Score:""").strip()
@@ -51,14 +79,9 @@ class ResponseModel(BaseModel):
     score: Literal["0", "1"]
-# Cache models to avoid reloading on every request
-_model_cache = {}
-def get_model_and_tokenizer(model_id: str, device_map: str = "auto", quantization_bits: Optional[int] = None):
-    if model_id in _model_cache:
-        return _model_cache[model_id]
     if quantization_bits == 4:
         quantization_config = BitsAndBytesConfig(
             load_in_4bit=True,
@@ -72,90 +95,86 @@ def get_model_and_tokenizer(model_id: str, device_map: str = "auto", quantizatio
         quantization_config = None
     if "longformer" in model_id:
-        model = AutoModelForSequenceClassification.from_pretrained(model_id)
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-        result = (model, tokenizer, "classification")
-    else:
-        # For other models, use the same approach as your original script
-        peft_config = PeftConfig.from_pretrained(model_id)
-        base_model_id = peft_config.base_model_name_or_path
-        model = AutoModelForCausalLM.from_pretrained(
-            base_model_id,
-            device_map=device_map,
-            quantization_config=quantization_config,
-        )
-        model = PeftModel.from_pretrained(model, model_id)
-        tokenizer = AutoTokenizer.from_pretrained(
-            base_model_id, use_fast=True, clean_up_tokenization_spaces=True
-        )
-        # Convert to outlines model
-        outlines_model = outlines.models.Transformers(model, tokenizer=tokenizer)
-        result = (outlines_model, tokenizer, "generation")
-    _model_cache[model_id] = result
-    return result
 def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -> str:
-    return PROMPT_TEMPLATE.format(
         story=story.strip(),
         question=question.strip(),
         grading_scheme=grading_scheme.strip(),
         answer=answer.strip(),
     )
 @spaces.GPU
 def label_single_response_with_model(model_id, story, question, criteria, response):
-    try:
-        prompt = format_prompt(story, question, criteria, response)
-        model, tokenizer, model_type = get_model_and_tokenizer(model_id, DEVICE_MAP, QUANTIZATION_BITS)
-        if model_type == "classification":
-            # For Longformer models
-            inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
-            with torch.no_grad():
-                logits = model(**inputs).logits
-            predicted_class = torch.argmax(logits, dim=1).item()
-            return str(predicted_class)
-        else:
-            # For generative models - using the new Outlines API
-            generator = outlines.generate.json(model, ResponseModel)
-            result = generator(prompt)
-            return result.score
-    except Exception as e:
-        logger.error(f"Error in label_single_response_with_model: {str(e)}")
-        return "Error: " + str(e)
 @spaces.GPU
-def label_multi_responses_with_model(model_id, story, question, criteria, response_file):
-    try:
-        df = pd.read_csv(response_file.name)
-        assert "response" in df.columns, "CSV must contain a 'response' column."
-        model, tokenizer, model_type = get_model_and_tokenizer(model_id, DEVICE_MAP, QUANTIZATION_BITS)
-        prompts = [format_prompt(story, question, criteria, resp) for resp in df["response"]]
-        if model_type == "classification":
-            inputs = tokenizer(prompts, return_tensors="pt", truncation=True, padding=True)
-            with torch.no_grad():
-                logits = model(**inputs).logits
-            predicted_classes = torch.argmax(logits, dim=1).tolist()
-            scores = [str(cls) for cls in predicted_classes]
-        else:
-            # For generative models - using the new Outlines API
-            generator = outlines.generate.json(model, ResponseModel)
-            results = generator(prompts)
-            scores = [r.score for r in results]
-        df["score"] = scores
-        return df
-    except Exception as e:
-        logger.error(f"Error in label_multi_responses_with_model: {str(e)}")
-        return f"Error: {str(e)}"
 def single_response_ui(model_id):
@@ -176,7 +195,10 @@ def single_response_ui(model_id):
 def multi_response_ui(model_id):
     return gr.Interface(
-        fn=lambda story, question, criteria, response_file: label_multi_responses_with_model(
             model_id.value, story, question, criteria, response_file
         ),
         inputs=[
@@ -196,7 +218,7 @@ with gr.Blocks(title="Zero-Shot Evaluation Grader") as iface:
     model_selector = gr.Dropdown(
         label="Select Model",
         choices=AVAILABLE_MODELS,
-        value=DEFAULT_MODEL_ID,
     )
     selected_model_id = gr.State(value=DEFAULT_MODEL_ID)

 import logging
 import textwrap
 from typing import Literal, Optional
 import gradio as gr
 import outlines
 import pandas as pd
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+MODEL_ID = "rshwndsz/ft-longformer-base-4096"
 DEVICE_MAP = "auto"
+QUANTIZATION_BITS = 4
 TEMPERATURE = 0.0
 AVAILABLE_MODELS = [
 ]
 DEFAULT_MODEL_ID = AVAILABLE_MODELS[0]
+# Exact SYSTEM_PROMPT from training data
+SYSTEM_PROMPT = textwrap.dedent("""
+You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information:
+1. A story that was presented to participants as context
+2. The question that participants were asked to answer
+3. A grading scheme to evaluate the answers (Correct Responses:1, incorrect response:0, Incomplete response:0, Irrelevant:0)
+4. Grading examples
+5. A participant answer
+Your task is to grade each answer according to the grading scheme. For each answer, you should:
+1. Carefully read and understand the answer and compare it to the grading criteria
+2. Assigning an score 1 or 0 for each answer.
+""").strip()
+# Exact PROMPT_TEMPLATE from training data
 PROMPT_TEMPLATE = textwrap.dedent("""
+<Story>
+{story}
+</Story>
+<Question>
+{question}
+</Question>
+<GradingScheme>
+{grading_scheme}
+</GradingScheme>
+<Answer>
+{answer}
+</Answer>
 Score:""").strip()
     score: Literal["0", "1"]
+def get_outlines_model(
+    model_id: str, device_map: str = "auto", quantization_bits: Optional[int] = 4
+):
     if quantization_bits == 4:
         quantization_config = BitsAndBytesConfig(
             load_in_4bit=True,
         quantization_config = None
     if "longformer" in model_id:
+        hf_model = AutoModelForSequenceClassification.from_pretrained(model_id)
+        hf_tokenizer = AutoTokenizer.from_pretrained(model_id)
+        return hf_model, hf_tokenizer
+    peft_config = PeftConfig.from_pretrained(model_id)
+    base_model_id = peft_config.base_model_name_or_path
+    base_model = AutoModelForCausalLM.from_pretrained(
+        base_model_id,
+        device_map=device_map,
+        quantization_config=quantization_config,
+    )
+    hf_model = PeftModel.from_pretrained(base_model, model_id)
+    hf_tokenizer = AutoTokenizer.from_pretrained(
+        base_model_id, use_fast=True, clean_up_tokenization_spaces=True
+    )
+    # Updated for new outlines API
+    model = outlines.models.Transformers(hf_model, hf_tokenizer)
+    return model
 def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -> str:
+    # Exact format used during training
+    prompt = PROMPT_TEMPLATE.format(
         story=story.strip(),
         question=question.strip(),
         grading_scheme=grading_scheme.strip(),
         answer=answer.strip(),
     )
+    # Exact concatenation used during training
+    full_prompt = SYSTEM_PROMPT + "\n" + prompt
+    return full_prompt
 @spaces.GPU
 def label_single_response_with_model(model_id, story, question, criteria, response):
+    prompt = format_prompt(story, question, criteria, response)
+    if "longformer" in model_id:
+        model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
+        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
+        with torch.no_grad():
+            logits = model(**inputs).logits
+        predicted_class = torch.argmax(logits, dim=1).item()
+        return str(predicted_class)
+    else:
+        model = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
+        # Updated for new outlines API
+        generator = outlines.generate.json(model, ResponseModel)
+        result = generator(prompt)
+        return result.score
 @spaces.GPU
+def label_multi_responses_with_model(
+    model_id, story, question, criteria, response_file
+):
+    df = pd.read_csv(response_file.name)
+    assert "response" in df.columns, "CSV must contain a 'response' column."
+    prompts = [
+        format_prompt(story, question, criteria, resp) for resp in df["response"]
+    ]
+    if "longformer" in model_id:
+        model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
+        inputs = tokenizer(prompts, return_tensors="pt", truncation=True, padding=True)
+        with torch.no_grad():
+            logits = model(**inputs).logits
+        predicted_classes = torch.argmax(logits, dim=1).tolist()
+        scores = [str(cls) for cls in predicted_classes]
+    else:
+        model = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
+        # Updated for new outlines API
+        generator = outlines.generate.json(model, ResponseModel)
+        results = generator(prompts)
+        scores = [r.score for r in results]
+    df["score"] = scores
+    return df
 def single_response_ui(model_id):
 def multi_response_ui(model_id):
     return gr.Interface(
+        fn=lambda story,
+        question,
+        criteria,
+        response_file: label_multi_responses_with_model(
             model_id.value, story, question, criteria, response_file
         ),
         inputs=[
     model_selector = gr.Dropdown(
         label="Select Model",
         choices=AVAILABLE_MODELS,
+        value=AVAILABLE_MODELS[0],
     )
     selected_model_id = gr.State(value=DEFAULT_MODEL_ID)