Spaces:

ActiveYixiao
/

automatic_coding

Sleeping

App Files Files Community

ActiveYixiao commited on Aug 29, 2025

Commit

bd59e63

verified ·

1 Parent(s): 56ef409

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -52

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import logging
 import textwrap
-from typing import Literal, Optional
 import gradio as gr
 import outlines
@@ -35,7 +35,7 @@ AVAILABLE_MODELS = [
 DEFAULT_MODEL_ID = AVAILABLE_MODELS[0]
 DEVICE_MAP = "auto"
-QUANTIZATION_BITS = None
 SYSTEM_PROMPT = textwrap.dedent("""
 You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information:
@@ -69,9 +69,9 @@ class ResponseModel(BaseModel):
     score: Literal["0", "1"]
-def get_outlines_model(
     model_id: str, device_map: str = "auto", quantization_bits: Optional[int] = 4
-):
     if quantization_bits == 4:
         quantization_config = BitsAndBytesConfig(
             load_in_4bit=True,
@@ -85,9 +85,14 @@ def get_outlines_model(
         quantization_config = None
     if "longformer" in model_id:
-        hf_model = AutoModelForSequenceClassification.from_pretrained(model_id)
-        hf_tokenizer = AutoTokenizer.from_pretrained(model_id)
-        return hf_model, hf_tokenizer
     peft_config = PeftConfig.from_pretrained(model_id)
     base_model_id = peft_config.base_model_name_or_path
@@ -97,13 +102,13 @@ def get_outlines_model(
         device_map=device_map,
         quantization_config=quantization_config,
     )
-    hf_model = PeftModel.from_pretrained(base_model, model_id)
-    hf_tokenizer = AutoTokenizer.from_pretrained(
         base_model_id, use_fast=True, clean_up_tokenization_spaces=True
     )
-    model = outlines.from_transformers(hf_model, hf_tokenizer)
-    return model
 def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -> str:
@@ -121,52 +126,82 @@ def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -
 def label_single_response_with_model(model_id, story, question, criteria, response):
     prompt = format_prompt(story, question, criteria, response)
-    if "longformer" in model_id:
-        model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
-        inputs = tokenizer(response, return_tensors="pt", truncation=True, padding=True)
-        with torch.no_grad():
-            logits = model(**inputs).logits
-        if logits.shape[1] == 1:
-            # Regression-style: apply sigmoid threshold at 0.5
-            score = int(torch.sigmoid(logits).item() > 0.5)
         else:
-            # Classification-style: argmax over 2 labels
-            score = torch.argmax(logits, dim=1).item()
-        return str(score)
-    else:
-        model = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
-        generator = Generator(model, ResponseModel)  # pass schema
-        result = generator(prompt)
-        return result.score
 @spaces.GPU
 def label_multi_responses_with_model(model_id, story, question, criteria, response_file):
-    df = pd.read_csv(response_file.name)
-    assert "response" in df.columns, "CSV must contain a 'response' column."
-    prompts = [
-        format_prompt(story, question, criteria, resp) for resp in df["response"]
-    ]
-    if "longformer" in model_id:
-        model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
-        inputs = tokenizer(df["response"].tolist(), return_tensors="pt", truncation=True, padding=True)
-        with torch.no_grad():
-            logits = model(**inputs).logits
-        if logits.shape[1] == 1:
-            scores = [str(int(torch.sigmoid(l) > 0.5)) for l in logits]
         else:
-            scores = [str(cls) for cls in torch.argmax(logits, dim=1).tolist()]
-    else:
-        model = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
-        generator = Generator(model, ResponseModel)
-        results = [generator(p) for p in prompts]
-        scores = [r.score for r in results]
-    df["score"] = scores
-    return df
 with gr.Blocks(title="Zero-Shot Evaluation Grader") as iface:
@@ -208,4 +243,4 @@ with gr.Blocks(title="Zero-Shot Evaluation Grader") as iface:
             )
 if __name__ == "__main__":
-    iface.launch(share=True)

 import logging
 import textwrap
+from typing import Literal, Optional, Tuple, Union
 import gradio as gr
 import outlines
 DEFAULT_MODEL_ID = AVAILABLE_MODELS[0]
 DEVICE_MAP = "auto"
+QUANTIZATION_BITS = 4  # Changed from None to 4 for better compatibility
 SYSTEM_PROMPT = textwrap.dedent("""
 You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information:
     score: Literal["0", "1"]
+def get_model_and_tokenizer(
     model_id: str, device_map: str = "auto", quantization_bits: Optional[int] = 4
+) -> Tuple[Union[AutoModelForCausalLM, AutoModelForSequenceClassification], AutoTokenizer]:
     if quantization_bits == 4:
         quantization_config = BitsAndBytesConfig(
             load_in_4bit=True,
         quantization_config = None
     if "longformer" in model_id:
+        model = AutoModelForSequenceClassification.from_pretrained(
+            model_id,
+            device_map=device_map,
+            quantization_config=quantization_config  # Added quantization for consistency
+        )
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        tokenizer.pad_token = tokenizer.eos_token  # Add padding token
+        return model, tokenizer
     peft_config = PeftConfig.from_pretrained(model_id)
     base_model_id = peft_config.base_model_name_or_path
         device_map=device_map,
         quantization_config=quantization_config,
     )
+    model = PeftModel.from_pretrained(base_model, model_id)
+    tokenizer = AutoTokenizer.from_pretrained(
         base_model_id, use_fast=True, clean_up_tokenization_spaces=True
     )
+    tokenizer.pad_token = tokenizer.eos_token  # Ensure padding token is set
+    return model, tokenizer
 def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -> str:
 def label_single_response_with_model(model_id, story, question, criteria, response):
     prompt = format_prompt(story, question, criteria, response)
+    try:
+        model, tokenizer = get_model_and_tokenizer(model_id, DEVICE_MAP, QUANTIZATION_BITS)
+        if "longformer" in model_id:
+            # Process with Longformer
+            inputs = tokenizer(
+                prompt,
+                return_tensors="pt",
+                truncation=True,
+                padding=True,
+                max_length=4096
+            )
+            with torch.no_grad():
+                logits = model(**inputs).logits
+            if logits.shape[1] == 1:
+                # Regression-style
+                score = int(torch.sigmoid(logits).item() > 0.5)
+            else:
+                # Classification-style
+                score = torch.argmax(logits, dim=1).item()
+            return str(score)
         else:
+            # Process with other models using outlines
+            outlines_model = outlines.from_transformers(model, tokenizer)
+            generator = Generator(outlines_model, ResponseModel)
+            result = generator(prompt)
+            return result.score
+    except Exception as e:
+        logger.error(f"Error processing request: {str(e)}")
+        return f"Error: {str(e)}"
 @spaces.GPU
 def label_multi_responses_with_model(model_id, story, question, criteria, response_file):
+    try:
+        df = pd.read_csv(response_file.name)
+        assert "response" in df.columns, "CSV must contain a 'response' column."
+        model, tokenizer = get_model_and_tokenizer(model_id, DEVICE_MAP, QUANTIZATION_BITS)
+        if "longformer" in model_id:
+            # Process with Longformer
+            prompts = [
+                format_prompt(story, question, criteria, resp)
+                for resp in df["response"]
+            ]
+            inputs = tokenizer(
+                prompts,
+                return_tensors="pt",
+                truncation=True,
+                padding=True,
+                max_length=4096
+            )
+            with torch.no_grad():
+                logits = model(**inputs).logits
+            if logits.shape[1] == 1:
+                scores = [str(int(torch.sigmoid(l) > 0.5)) for l in logits]
+            else:
+                scores = [str(cls) for cls in torch.argmax(logits, dim=1).tolist()]
         else:
+            # Process with other models
+            outlines_model = outlines.from_transformers(model, tokenizer)
+            generator = Generator(outlines_model, ResponseModel)
+            scores = []
+            for resp in df["response"]:
+                prompt = format_prompt(story, question, criteria, resp)
+                result = generator(prompt)
+                scores.append(result.score)
+        df["score"] = scores
+        return df
+    except Exception as e:
+        logger.error(f"Error processing batch: {str(e)}")
+        return pd.DataFrame({"error": [str(e)]})
 with gr.Blocks(title="Zero-Shot Evaluation Grader") as iface:
             )
 if __name__ == "__main__":
+    iface.launch(share=True)