Spaces:

teameight
/

fairytale_generator

Runtime error

App Files Files Community

Antonio0616 commited on Jul 25, 2025

Commit

e6b34ce

1 Parent(s): ff4ce05

mistaralai 사용

Browse files

Files changed (2) hide show

model_comparion.py +88 -0
model_comparion2.py +79 -0

model_comparion.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# ✅ 고성능 모델 설정
+MODEL_NAME = "mistralai/Mixtral-8x7B-Instruct-v0.1"
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# ✅ 토크나이저 및 모델 로드
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME,
+    torch_dtype=torch.float16,
+    device_map="auto",
+)
+# ✅ 프롬프트 생성 함수
+def build_comparison_prompt(story_a, story_b):
+    return f"""<s>[INST] You are a children's story evaluation expert.
+Compare the following two stories on:
+1. Coherence (structure and flow)
+2. Creativity (imagination and originality)
+3. Engagement (fun and emotional draw)
+Evaluate both stories and score each criterion from 1 to 5.
+Then, provide a brief comment and declare the overall better story.
+Story A:
+{story_a}
+Story B:
+{story_b}
+Respond in this format:
+Story A:
+- Coherence: ?/5
+- Creativity: ?/5
+- Engagement: ?/5
+- Comment: ...
+Story B:
+- Coherence: ?/5
+- Creativity: ?/5
+- Engagement: ?/5
+- Comment: ...
+🟢 Overall Winner: Story A or Story B
+Comment: ... [/INST]
+"""
+# ✅ 평가 함수
+def evaluate_stories_with_mixtral(story_a, story_b):
+    prompt = build_comparison_prompt(story_a, story_b)
+    inputs = tokenizer(prompt, return_tensors="pt", return_token_type_ids=False).to(device)
+    outputs = model.generate(
+        **inputs,
+        max_new_tokens=1024,
+        temperature=0.7,
+        top_p=0.9,
+        do_sample=True,
+        pad_token_id=tokenizer.eos_token_id,
+    )
+    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    result = result.replace(prompt.strip(), "").strip()  # 프롬프트 제거
+    if len(result) < 30 or "Story A" not in result:
+        return "❌ 평가 실패: 출력이 부족하거나 형식이 맞지 않습니다. 다시 시도해주세요."
+    return result
+# ✅ Gradio UI
+with gr.Blocks() as demo:
+    gr.Markdown("## 🧙‍♂️ 동화 비교 평가기 (Mixtral 8x7B)\n긴 동화도 안정적으로 비교합니다!")
+    with gr.Row():
+        story_a = gr.Textbox(label="📘 Story A", lines=20, placeholder="전체 동화 A 입력")
+        story_b = gr.Textbox(label="📗 Story B", lines=20, placeholder="전체 동화 B 입력")
+    result = gr.Textbox(label="📊 평가 결과", lines=35)
+    compare = gr.Button("🧠 평가하기")
+    compare.click(fn=evaluate_stories_with_mixtral, inputs=[story_a, story_b], outputs=result)
+if __name__ == "__main__":
+    demo.launch(share=True)

model_comparion2.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+# ✅ 고성능 모델 지정
+MODEL_NAME = "google/flan-t5-large"
+# ✅ 디바이스 설정
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# ✅ 모델 및 토크나이저 로드
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(device)
+# ✅ 프롬프트 생성
+def build_prompt(story_a, story_b):
+    return f"""
+You are a story evaluation assistant. Please compare the two children's stories below using the following criteria:
+1. Coherence (structure and flow)
+2. Creativity (imagination and originality)
+3. Engagement (fun and emotional draw)
+Give each story a score from 1 to 5 on each criterion. Then summarize which story is better overall.
+Story A: {story_a}
+Story B: {story_b}
+Respond in this format:
+Story A:
+- Coherence: ?/5
+- Creativity: ?/5
+- Engagement: ?/5
+- Comment: ...
+Story B:
+- Coherence: ?/5
+- Creativity: ?/5
+- Engagement: ?/5
+- Comment: ...
+🟢 Overall Winner: Story A or Story B
+"""
+# ✅ 평가 함수
+def evaluate_stories(story_a, story_b):
+    prompt = build_prompt(story_a, story_b)
+    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(device)
+    outputs = model.generate(
+        **inputs,
+        max_length=512,
+        temperature=0.7,
+        top_k=50,
+        top_p=0.95,
+        num_beams=4,
+    )
+    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    if len(result.strip()) < 20:
+        return "❌ 평가 실패: 응답이 너무 짧습니다. 스토리를 간단히 줄이거나 모델 성능을 높여보세요."
+    return result.strip()
+# ✅ Gradio 인터페이스
+with gr.Blocks() as demo:
+    gr.Markdown("## 🧙‍♂️ 동화 비교 평가기 (Flan-T5 Large)\n두 개의 동화를 비교해 평가합니다!")
+    with gr.Row():
+        story_a = gr.Textbox(label="📘 Story A", lines=12)
+        story_b = gr.Textbox(label="📗 Story B", lines=12)
+    result = gr.Textbox(label="📊 평가 결과", lines=20)
+    compare = gr.Button("🧠 평가하기")
+    compare.click(fn=evaluate_stories, inputs=[story_a, story_b], outputs=result)
+if __name__ == "__main__":
+    demo.launch(share=True)