Spaces:

ActiveYixiao
/

automatic_coding

Sleeping

App Files Files Community

ActiveYixiao commited on Aug 29, 2025

Commit

5e48cc5

verified ·

1 Parent(s): e358772

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -29

app.py CHANGED Viewed

@@ -5,12 +5,17 @@ from typing import Literal, Optional
 import gradio as gr
 import outlines
 import pandas as pd
 import torch
 from outlines import Generator
 from peft import PeftConfig, PeftModel
 from pydantic import BaseModel, ConfigDict
-from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer, BitsAndBytesConfig
-import spaces
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -20,24 +25,29 @@ DEVICE_MAP = "auto"
 QUANTIZATION_BITS = None
 TEMPERATURE = 0.0
-AVAILABLE_MODELS = {
-    "Longformer": "rshwndsz/ft-longformer-base-4096",
-    "Llama 3.2 3B [Paraphrased]": "rshwndsz/ft_paraphrased-hermes-3-llama-3.2-3b"
-}
-DEFAULT_MODEL_ID = list(AVAILABLE_MODELS.values())[0]
 SYSTEM_PROMPT = textwrap.dedent("""
 You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information:
 1. A story that was presented to participants as context
 2. The question that participants were asked to answer
 3. A grading scheme to evaluate the answers (Correct Responses:1, incorrect response:0, Incomplete response:0, Irrelevant:0)
 4. Grading examples
 5. A participant answer
 Your task is to grade each answer according to the grading scheme. For each answer, you should:
 1. Carefully read and understand the answer and compare it to the grading criteria
 2. Assigning an score 1 or 0 for each answer.
 """).strip()
@@ -46,19 +56,15 @@ PROMPT_TEMPLATE = textwrap.dedent("""
 <Story>
 {story}
 </Story>
 <Question>
 {question}
 </Question>
 <GradingScheme>
 {grading_scheme}
 </GradingScheme>
 <Answer>
 {answer}
 </Answer>
 Score:""").strip()
@@ -67,7 +73,9 @@ class ResponseModel(BaseModel):
     score: Literal["0", "1"]
-def get_outlines_model(model_id: str, device_map: str = "auto", quantization_bits: Optional[int] = 4):
     if quantization_bits == 4:
         quantization_config = BitsAndBytesConfig(
             load_in_4bit=True,
@@ -94,7 +102,9 @@ def get_outlines_model(model_id: str, device_map: str = "auto", quantization_bit
         quantization_config=quantization_config,
     )
     hf_model = PeftModel.from_pretrained(base_model, model_id)
-    hf_tokenizer = AutoTokenizer.from_pretrained(base_model_id, use_fast=True, clean_up_tokenization_spaces=True)
     model = outlines.from_transformers(hf_model, hf_tokenizer)
     return model
@@ -129,11 +139,16 @@ def label_single_response_with_model(model_id, story, question, criteria, respon
             result = generator(prompt)
         return result.score
 @spaces.GPU
-def label_multi_responses_with_model(model_id, story, question, criteria, response_file):
     df = pd.read_csv(response_file.name)
     assert "response" in df.columns, "CSV must contain a 'response' column."
-    prompts = [format_prompt(story, question, criteria, resp) for resp in df["response"]]
     if "longformer" in model_id:
         model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
@@ -168,38 +183,49 @@ def single_response_ui(model_id):
         live=False,
     )
 def multi_response_ui(model_id):
     return gr.Interface(
-        fn=lambda story, question, criteria, response_file: label_multi_responses_with_model(
             model_id.value, story, question, criteria, response_file
         ),
         inputs=[
             gr.Textbox(label="Story", lines=6),
             gr.Textbox(label="Question", lines=2),
             gr.Textbox(label="Criteria (Grading Scheme)", lines=4),
-            gr.File(label="Responses CSV (.csv with 'response' column)", file_types=[".csv"]),
         ],
         outputs=gr.Dataframe(label="Labeled Responses", type="pandas"),
         live=False,
     )
 with gr.Blocks(title="Zero-Shot Evaluation Grader") as iface:
     model_selector = gr.Dropdown(
         label="Select Model",
-        choices=list(AVAILABLE_MODELS.keys()),
-        value=list(AVAILABLE_MODELS.keys())[0],
     )
     selected_model_id = gr.State(value=DEFAULT_MODEL_ID)
     def update_model_id(choice):
-        return AVAILABLE_MODELS[choice]
-    model_selector.change(fn=update_model_id, inputs=model_selector, outputs=selected_model_id)
-    gr.TabbedInterface(
-        [single_response_ui(selected_model_id), multi_response_ui(selected_model_id)],
-        ["Single Response", "Batch (CSV)"],
-    ).render()
 if __name__ == "__main__":
-    iface.launch()

 import gradio as gr
 import outlines
 import pandas as pd
+import spaces
 import torch
 from outlines import Generator
 from peft import PeftConfig, PeftModel
 from pydantic import BaseModel, ConfigDict
+from transformers import (
+    AutoModelForCausalLM,
+    AutoModelForSequenceClassification,
+    AutoTokenizer,
+    BitsAndBytesConfig,
+)
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 QUANTIZATION_BITS = None
 TEMPERATURE = 0.0
+AVAILABLE_MODELS = [
+    "rshwndsz/ft-longformer-base-4096",
+    "rshwndsz/ft-hermes-3-llama-3.2-3b",
+    "rshwndsz/ft-phi-3.5-mini-instruct",
+    "rshwndsz/ft-mistral-7b-v0.3-instruct",
+    "rshwndsz/ft-phi-4",
+    "rshwndsz/ft_paraphrased-hermes-3-llama-3.2-3b",
+    "rshwndsz/ft_paraphrased-longformer-base-4096",
+    "rshwndsz/ft_paraphrased-phi-3.5-mini-instruct",
+    "rshwndsz/ft_paraphrased-mistral-7b-v0.3-instruct",
+    "rshwndsz/ft_paraphrased-phi-4",
+]
+DEFAULT_MODEL_ID = AVAILABLE_MODELS[0]
 SYSTEM_PROMPT = textwrap.dedent("""
 You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information:
 1. A story that was presented to participants as context
 2. The question that participants were asked to answer
 3. A grading scheme to evaluate the answers (Correct Responses:1, incorrect response:0, Incomplete response:0, Irrelevant:0)
 4. Grading examples
 5. A participant answer
 Your task is to grade each answer according to the grading scheme. For each answer, you should:
 1. Carefully read and understand the answer and compare it to the grading criteria
 2. Assigning an score 1 or 0 for each answer.
 """).strip()
 <Story>
 {story}
 </Story>
 <Question>
 {question}
 </Question>
 <GradingScheme>
 {grading_scheme}
 </GradingScheme>
 <Answer>
 {answer}
 </Answer>
 Score:""").strip()
     score: Literal["0", "1"]
+def get_outlines_model(
+    model_id: str, device_map: str = "auto", quantization_bits: Optional[int] = 4
+):
     if quantization_bits == 4:
         quantization_config = BitsAndBytesConfig(
             load_in_4bit=True,
         quantization_config=quantization_config,
     )
     hf_model = PeftModel.from_pretrained(base_model, model_id)
+    hf_tokenizer = AutoTokenizer.from_pretrained(
+        base_model_id, use_fast=True, clean_up_tokenization_spaces=True
+    )
     model = outlines.from_transformers(hf_model, hf_tokenizer)
     return model
             result = generator(prompt)
         return result.score
 @spaces.GPU
+def label_multi_responses_with_model(
+    model_id, story, question, criteria, response_file
+):
     df = pd.read_csv(response_file.name)
     assert "response" in df.columns, "CSV must contain a 'response' column."
+    prompts = [
+        format_prompt(story, question, criteria, resp) for resp in df["response"]
+    ]
     if "longformer" in model_id:
         model, tokenizer = get_outlines_model(model_id, DEVICE_MAP, QUANTIZATION_BITS)
         live=False,
     )
 def multi_response_ui(model_id):
     return gr.Interface(
+        fn=lambda story,
+        question,
+        criteria,
+        response_file: label_multi_responses_with_model(
             model_id.value, story, question, criteria, response_file
         ),
         inputs=[
             gr.Textbox(label="Story", lines=6),
             gr.Textbox(label="Question", lines=2),
             gr.Textbox(label="Criteria (Grading Scheme)", lines=4),
+            gr.File(
+                label="Responses CSV (.csv with 'response' column)", file_types=[".csv"]
+            ),
         ],
         outputs=gr.Dataframe(label="Labeled Responses", type="pandas"),
         live=False,
     )
 with gr.Blocks(title="Zero-Shot Evaluation Grader") as iface:
     model_selector = gr.Dropdown(
         label="Select Model",
+        choices=AVAILABLE_MODELS,
+        value=AVAILABLE_MODELS[0],
     )
     selected_model_id = gr.State(value=DEFAULT_MODEL_ID)
     def update_model_id(choice):
+        return choice
+    model_selector.change(
+        fn=update_model_id, inputs=model_selector, outputs=selected_model_id
+    )
+    with gr.Tabs():
+        with gr.Tab("Single Response"):
+            single_response_ui(selected_model_id)
+        with gr.Tab("Batch (CSV)"):
+            multi_response_ui(selected_model_id)
 if __name__ == "__main__":
+    iface.launch(share=True)