Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,13 +13,13 @@ import warnings
|
|
| 13 |
warnings.filterwarnings("ignore")
|
| 14 |
|
| 15 |
# ==================== CONFIGURATION (Final) ====================
|
| 16 |
-
# 🔴 1. اسم النموذج الأساسي
|
| 17 |
BASE_MODEL_ID = "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
|
| 18 |
# 🔴 2. مسار مُحولات التدريب (استبدله بالمعرف الصحيح للمستودع الخاص بك)
|
| 19 |
ADAPTER_REPO_ID = "YOUR_HF_USERNAME/Llama-4-Turkmen-Adapter"
|
| 20 |
ADAPTER_FOLDER = "Final_Adapter"
|
| 21 |
|
| 22 |
-
# إعدادات الذاكرة والتوكن
|
| 23 |
HF_TOKEN = os.environ.get("HUGGING_FACE_HUB_TOKEN", None)
|
| 24 |
BNB_CONFIG = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16)
|
| 25 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
@@ -29,12 +29,11 @@ LANG_MAP = {"English": "en", "Turkish": "tr"}
|
|
| 29 |
# 1. دالة تحميل النموذج (تتم مرة واحدة عند بدء التطبيق)
|
| 30 |
def load_model():
|
| 31 |
print("=====================================================")
|
| 32 |
-
print("🚀 PHASE 1: Loading Base Model (
|
| 33 |
print("=====================================================")
|
| 34 |
|
| 35 |
if HF_TOKEN is None:
|
| 36 |
-
print("
|
| 37 |
-
sys.exit(1)
|
| 38 |
|
| 39 |
base_model = AutoModelForCausalLM.from_pretrained(
|
| 40 |
BASE_MODEL_ID,
|
|
@@ -68,8 +67,10 @@ def calculate_similarity(text1, text2):
|
|
| 68 |
def generate_and_verify(topic, lang, tokenizer, model):
|
| 69 |
lang_code = LANG_MAP[lang]
|
| 70 |
|
| 71 |
-
#
|
| 72 |
prompt = f"<|start_header_id|>user<|end_header_id|>\n\nWrite a factual summary about {topic}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
|
|
|
|
|
|
|
| 73 |
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(DEVICE)
|
| 74 |
outputs = model.generate(
|
| 75 |
input_ids, max_new_tokens=150, do_sample=True, temperature=0.6, pad_token_id=tokenizer.eos_token_id
|
|
@@ -79,7 +80,7 @@ def generate_and_verify(topic, lang, tokenizer, model):
|
|
| 79 |
try: ai_body = full_text.split("assistant\n\n")[-1].strip()
|
| 80 |
except: ai_body = full_text
|
| 81 |
|
| 82 |
-
#
|
| 83 |
wiki_live = wikipediaapi.Wikipedia(user_agent='HFConsoleVerifier', language=lang_code, extract_format=wikipediaapi.ExtractFormat.WIKI)
|
| 84 |
page = wiki_live.page(topic)
|
| 85 |
|
|
@@ -96,7 +97,7 @@ def generate_and_verify(topic, lang, tokenizer, model):
|
|
| 96 |
output_report.append(f"AI Response: {ai_body}")
|
| 97 |
else:
|
| 98 |
output_report.append(f"STATUS: 🔴 REJECTED (Low Match, Corrected)")
|
| 99 |
-
output_report.append(f"AI Hallucination: {ai_body[:
|
| 100 |
output_report.append(f"CORRECT SOURCE: {real_summary[:200]}...")
|
| 101 |
else:
|
| 102 |
output_report.append(f"STATUS: ❌ WIKI SOURCE ERROR (Unverified)")
|
|
@@ -110,15 +111,15 @@ def generate_and_verify(topic, lang, tokenizer, model):
|
|
| 110 |
|
| 111 |
if __name__ == "__main__":
|
| 112 |
|
| 113 |
-
# تحميل المو
|
| 114 |
TOKENIZER, MODEL = load_model()
|
| 115 |
|
| 116 |
-
# قائمة الاختبارات للتشغيل التلقائي
|
| 117 |
tests_to_run = [
|
| 118 |
("Kirkuk", "English"),
|
| 119 |
-
("
|
| 120 |
-
("
|
| 121 |
-
("
|
| 122 |
]
|
| 123 |
|
| 124 |
print("\n\n######################################################################")
|
|
|
|
| 13 |
warnings.filterwarnings("ignore")
|
| 14 |
|
| 15 |
# ==================== CONFIGURATION (Final) ====================
|
| 16 |
+
# 🔴 1. اسم النموذج الأساسي: هذا النموذج ضخم ويتم تحميله بضغط 4-بت.
|
| 17 |
BASE_MODEL_ID = "meta-llama/Llama-4-Maverick-17B-128E-Instruct"
|
| 18 |
# 🔴 2. مسار مُحولات التدريب (استبدله بالمعرف الصحيح للمستودع الخاص بك)
|
| 19 |
ADAPTER_REPO_ID = "YOUR_HF_USERNAME/Llama-4-Turkmen-Adapter"
|
| 20 |
ADAPTER_FOLDER = "Final_Adapter"
|
| 21 |
|
| 22 |
+
# إعدادات الذاكرة والتوكن (يتم قراءة التوكن من الأسرار)
|
| 23 |
HF_TOKEN = os.environ.get("HUGGING_FACE_HUB_TOKEN", None)
|
| 24 |
BNB_CONFIG = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16)
|
| 25 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 29 |
# 1. دالة تحميل النموذج (تتم مرة واحدة عند بدء التطبيق)
|
| 30 |
def load_model():
|
| 31 |
print("=====================================================")
|
| 32 |
+
print(f"🚀 PHASE 1: Loading Base Model ({BASE_MODEL_ID})")
|
| 33 |
print("=====================================================")
|
| 34 |
|
| 35 |
if HF_TOKEN is None:
|
| 36 |
+
print("⚠️ WARNING: HUGGING_FACE_HUB_TOKEN not found. Access to Llama models may fail.")
|
|
|
|
| 37 |
|
| 38 |
base_model = AutoModelForCausalLM.from_pretrained(
|
| 39 |
BASE_MODEL_ID,
|
|
|
|
| 67 |
def generate_and_verify(topic, lang, tokenizer, model):
|
| 68 |
lang_code = LANG_MAP[lang]
|
| 69 |
|
| 70 |
+
# تنسيق السؤال (Llama Chat Template)
|
| 71 |
prompt = f"<|start_header_id|>user<|end_header_id|>\n\nWrite a factual summary about {topic}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
|
| 72 |
+
|
| 73 |
+
# التوليد
|
| 74 |
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(DEVICE)
|
| 75 |
outputs = model.generate(
|
| 76 |
input_ids, max_new_tokens=150, do_sample=True, temperature=0.6, pad_token_id=tokenizer.eos_token_id
|
|
|
|
| 80 |
try: ai_body = full_text.split("assistant\n\n")[-1].strip()
|
| 81 |
except: ai_body = full_text
|
| 82 |
|
| 83 |
+
# 3. التحقق المباشر من ويكيبيديا
|
| 84 |
wiki_live = wikipediaapi.Wikipedia(user_agent='HFConsoleVerifier', language=lang_code, extract_format=wikipediaapi.ExtractFormat.WIKI)
|
| 85 |
page = wiki_live.page(topic)
|
| 86 |
|
|
|
|
| 97 |
output_report.append(f"AI Response: {ai_body}")
|
| 98 |
else:
|
| 99 |
output_report.append(f"STATUS: 🔴 REJECTED (Low Match, Corrected)")
|
| 100 |
+
output_report.append(f"AI Hallucination: {ai_body[:100]}...")
|
| 101 |
output_report.append(f"CORRECT SOURCE: {real_summary[:200]}...")
|
| 102 |
else:
|
| 103 |
output_report.append(f"STATUS: ❌ WIKI SOURCE ERROR (Unverified)")
|
|
|
|
| 111 |
|
| 112 |
if __name__ == "__main__":
|
| 113 |
|
| 114 |
+
# 1. تحميل النموذج
|
| 115 |
TOKENIZER, MODEL = load_model()
|
| 116 |
|
| 117 |
+
# 2. قائمة الاختبارات للتشغيل التلقائي
|
| 118 |
tests_to_run = [
|
| 119 |
("Kirkuk", "English"),
|
| 120 |
+
("Altınköprü Katliamı", "Turkish"),
|
| 121 |
+
("Gokturks", "English"),
|
| 122 |
+
("Kerkük", "Turkish")
|
| 123 |
]
|
| 124 |
|
| 125 |
print("\n\n######################################################################")
|