Spaces:

ActiveYixiao
/

automatic_coding

Sleeping

App Files Files Community

Yixiao Wang (Computer Science) commited on Aug 7, 2025

Commit

cd15e92

1 Parent(s): 6ba2695

init app

Browse files

Files changed (2) hide show

app.py +171 -58
requirements.txt +11 -1

app.py CHANGED Viewed

@@ -1,64 +1,177 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
     ],
 )
 if __name__ == "__main__":
-    demo.launch()

+import logging
+import textwrap
+from typing import Literal, Optional
 import gradio as gr
+import outlines
+import pandas as pd
+import torch
+from outlines import Generator
+from peft import PeftConfig, PeftModel
+from pydantic import BaseModel, ConfigDict
+from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer, BitsAndBytesConfig
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+MODEL_ID = "rshwndsz/ft-hermes-3-llama-3.2-3b"
+DEVICE_MAP = "auto"
+QUANTIZATION_BITS = None
+TEMPERATURE = 0.0
+SYSTEM_PROMPT = textwrap.dedent("""
+You are an assistant tasked with grading answers to a mind reading ability test. You will be provided with the following information:
+1. A story that was presented to participants as context
+2. The question that participants were asked to answer
+3. A grading scheme to evaluate the answers (Correct Responses:1, incorrect response:0, Incomplete response:0, Irrelevant:0)
+4. Grading examples
+5. A participant answer
+Your task is to grade each answer according to the grading scheme. For each answer, you should:
+1. Carefully read and understand the answer and compare it to the grading criteria
+2. Assigning an score 1 or 0 for each answer.
+""").strip()
+PROMPT_TEMPLATE = textwrap.dedent("""
+<Story>
+{story}
+</Story>
+<Question>
+{question}
+</Question>
+<GradingScheme>
+{grading_scheme}
+</GradingScheme>
+<Answer>
+{answer}
+</Answer>
+Score:""").strip()
+class ResponseModel(BaseModel):
+    model_config = ConfigDict(extra="forbid")
+    score: Literal["0", "1"]
+def get_outlines_model(model_id: str, device_map: str = "auto", quantization_bits: Optional[int] = 4):
+    if quantization_bits == 4:
+        quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_compute_dtype=torch.bfloat16,
+        )
+    elif quantization_bits == 8:
+        quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+    else:
+        quantization_config = None
+    if "longformer" in model_id:
+        hf_model = AutoModelForSequenceClassification.from_pretrained(model_id)
+        hf_tokenizer = AutoTokenizer.from_pretrained(model_id)
+        return hf_model, hf_tokenizer
+    peft_config = PeftConfig.from_pretrained(model_id)
+    base_model_id = peft_config.base_model_name_or_path
+    base_model = AutoModelForCausalLM.from_pretrained(
+        base_model_id,
+        device_map=device_map,
+        quantization_config=quantization_config,
+    )
+    hf_model = PeftModel.from_pretrained(base_model, model_id)
+    hf_tokenizer = AutoTokenizer.from_pretrained(base_model_id, use_fast=True, clean_up_tokenization_spaces=True)
+    model = outlines.from_transformers(hf_model, hf_tokenizer)
+    return model
+def format_prompt(story: str, question: str, grading_scheme: str, answer: str) -> str:
+    prompt = PROMPT_TEMPLATE.format(
+        story=story.strip(),
+        question=question.strip(),
+        grading_scheme=grading_scheme.strip(),
+        answer=answer.strip(),
+    )
+    full_prompt = SYSTEM_PROMPT + "\n\n" + prompt
+    return full_prompt
+def label_single_response(story, question, criteria, response):
+    prompt = format_prompt(story, question, criteria, response)
+    if "longformer" in MODEL_ID:
+        model, tokenizer = get_outlines_model(MODEL_ID, DEVICE_MAP, QUANTIZATION_BITS)
+        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)
+        with torch.no_grad():
+            logits = model(**inputs).logits
+        predicted_class = torch.argmax(logits, dim=1).item()
+        return str(predicted_class)
+    else:
+        model = get_outlines_model(MODEL_ID, DEVICE_MAP, QUANTIZATION_BITS)
+        generator = Generator(model)
+        with torch.no_grad():
+            result = generator(prompt)
+        return result.score
+def label_multi_responses(story, question, criteria, response_file):
+    df = pd.read_csv(response_file.name)
+    assert "response" in df.columns, "CSV must contain a 'response' column."
+    prompts = [format_prompt(story, question, criteria, resp) for resp in df["response"]]
+    if "longformer" in MODEL_ID:
+        model, tokenizer = get_outlines_model(MODEL_ID, DEVICE_MAP, QUANTIZATION_BITS)
+        inputs = tokenizer(prompts, return_tensors="pt", truncation=True, padding=True)
+        with torch.no_grad():
+            logits = model(**inputs).logits
+        predicted_classes = torch.argmax(logits, dim=1).tolist()
+        scores = [str(cls) for cls in predicted_classes]
+    else:
+        model = get_outlines_model(MODEL_ID, DEVICE_MAP, QUANTIZATION_BITS)
+        generator = Generator(model)
+        with torch.no_grad():
+            results = generator(prompts)
+        scores = [r.score for r in results]
+    df["score"] = scores
+    return df
+single_tab = gr.Interface(
+    fn=label_single_response,
+    inputs=[
+        gr.Textbox(label="Story", lines=6),
+        gr.Textbox(label="Question", lines=2),
+        gr.Textbox(label="Criteria (Grading Scheme)", lines=4),
+        gr.Textbox(label="Single Response", lines=3),
     ],
+    outputs=gr.Textbox(label="Score"),
 )
+multi_tab = gr.Interface(
+    fn=label_multi_responses,
+    inputs=[
+        gr.Textbox(label="Story", lines=6),
+        gr.Textbox(label="Question", lines=2),
+        gr.Textbox(label="Criteria (Grading Scheme)", lines=4),
+        gr.File(label="Responses CSV (.csv with 'response' column)", file_types=[".csv"]),
+    ],
+    outputs=gr.Dataframe(label="Labeled Responses", type="pandas"),
+)
+iface = gr.TabbedInterface(
+    [single_tab, multi_tab],
+    ["Single Response", "Batch (CSV)"],
+    title="Zero-Shot Evaluation Grader",
+)
 if __name__ == "__main__":
+    iface.launch()

requirements.txt CHANGED Viewed

	@@ -1 +1,11 @@
1	- huggingface_hub==0.25.2

+huggingface_hub==0.25.2
+transformers
+gradio
+peft
+outlines
+bitsandbytes
+accelerate
+torch
+pandas
+pydantic
+numpy