Spaces:

orgoflu
/

moro_test-bench

Runtime error

App Files Files Community

orgoflu commited on Sep 11, 2025

Commit

317a31a

verified ·

1 Parent(s): 2e6f84b

app.py

Browse files

Files changed (1) hide show

app.py +105 -0

app.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import time
+import json
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+import torch
+# 테스트할 모델 목록 (공개·비게이트 제외)
+MODELS = [
+    "naver-clova/HyperCLOVA-X-Seed-3B",
+    "NousResearch/Hermes-3-Llama-3.2-3B",
+    "tiiuae/Falcon-7B-Instruct",
+    "openchat/openchat-3.5-0106",
+    "mistralai/Mistral-7B-Instruct-v0.3",
+    "Qwen/Qwen2.5-7B-Instruct",
+    "deepseek-ai/deepseek-coder-6.7b-instruct",
+    "microsoft/Phi-3-mini-4k-instruct"
+]
+# Hugging Face 토큰 (Gated 모델 테스트 시 필요)
+HF_TOKEN = None  # "hf_xxx" 형태로 입력
+# 테스트 프롬프트 세트
+TESTS = [
+    {
+        "name": "요약",
+        "prompt": "다음 문장을 한 줄로 요약해 주세요: 인공지능은 다양한 산업에서 혁신을 이끌고 있다.",
+        "keywords": ["혁신", "산업"]
+    },
+    {
+        "name": "QA",
+        "prompt": "달의 중력은 지구의 몇 분의 몇인가요?",
+        "keywords": ["6분의1", "1/6"]
+    },
+    {
+        "name": "코드",
+        "prompt": "파이썬으로 피보나치 수열을 출력하는 함수를 작성해 주세요.",
+        "keywords": ["def", "fibonacci"]
+    }
+]
+def benchmark_model(model_id):
+    try:
+        start_time = time.time()
+        tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
+        model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+            device_map="auto",
+            token=HF_TOKEN
+        )
+        pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
+        elapsed_load = round(time.time() - start_time, 2)
+        test_results = []
+        total_quality = 0
+        for test in TESTS:
+            t_start = time.time()
+            output = pipe(test["prompt"], max_new_tokens=100, do_sample=False)[0]['generated_text']
+            t_elapsed = round(time.time() - t_start, 2)
+            quality_score = sum(1 for kw in test["keywords"] if kw in output)
+            total_quality += quality_score
+            test_results.append({
+                "test_name": test["name"],
+                "time_sec": t_elapsed,
+                "quality_score": quality_score,
+                "output": output.strip()
+            })
+        total_time = round(time.time() - start_time, 2)
+        return {
+            "model": model_id,
+            "load_time_sec": elapsed_load,
+            "total_time_sec": total_time,
+            "total_quality_score": total_quality,
+            "tests": test_results
+        }
+    except Exception as e:
+        return {"model": model_id, "error": str(e)}
+def main():
+    results = []
+    for m in MODELS:
+        print(f"\n=== {m} ===")
+        res = benchmark_model(m)
+        results.append(res)
+        if "error" in res:
+            print(f"❌ Error: {res['error']}")
+        else:
+            print(f"⏱ 로드 {res['load_time_sec']}s | 총 {res['total_time_sec']}s | 품질합 {res['total_quality_score']}")
+            for t in res["tests"]:
+                print(f"  - {t['test_name']}: {t['time_sec']}s | 품질 {t['quality_score']}")
+                print(f"    {t['output'][:80]}...")
+    with open("benchmark_results.json", "w", encoding="utf-8") as f:
+        json.dump(results, f, ensure_ascii=False, indent=2)
+    valid_results = [r for r in results if "error" not in r]
+    ranked = sorted(valid_results, key=lambda x: (-x["total_quality_score"], x["total_time_sec"]))
+    print("\n🏆 추천 모델 TOP:")
+    for r in ranked[:3]:
+        print(f"{r['model']} | 품질 {r['total_quality_score']} | 총 {r['total_time_sec']}s")
+if __name__ == "__main__":
+    main()