Spaces:

anaspro
/

chatbox

Runtime error

anaspro

updatE

b7cdae4 6 months ago

6.88 kB

	# -- coding: utf-8 --

	import os
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import gradio as gr
	import spaces

	model_path = "anaspro/iraqi-7b"

	# Iraqi Arabic chat prompt
	prompt_ar = """### Instruction:أنت ذكاء صناعي يتحدث باللهجة العراقية ويجيب باحترافية وذكاء. الإجابات دائما تستخدم اللهجة العراقية.
	### Input:[\|Human\|] {Question}
	[\|AI\|]
	### Response :"""

	# إذا كان فيه HF_TOKEN في البيئة
	hf_token = os.getenv("HF_TOKEN")

	device = "cuda" if torch.cuda.is_available() else "cpu"

	# تحميل المودل مع تحسينات لـ ZeroGPU
	print("جاري تحميل المودل...")
	tokenizer = AutoTokenizer.from_pretrained(
	model_path,
	token=hf_token,
	trust_remote_code=True
	)

	model = AutoModelForCausalLM.from_pretrained(
	model_path,
	device_map="auto",
	trust_remote_code=True,
	token=hf_token,
	torch_dtype=torch.float16 if device == "cuda" else torch.float32,
	low_cpu_mem_usage=True
	)
	print("تم تحميل المودل بنجاح!")

	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	def get_response(text, tokenizer=tokenizer, model=model):
	"""نفس الدالة من documentation مع تعديل لـ chat model"""
	tokenized = tokenizer(text, return_tensors="pt")
	input_ids, attention_mask = tokenized['input_ids'].to(device), tokenized['attention_mask'].to(device)
	input_len = input_ids.shape[-1]
	generate_ids = model.generate(
	input_ids,
	attention_mask=attention_mask,
	top_p=0.8,
	temperature=0.2,
	max_length=input_len + 256, # Limit response length to prevent multiple responses
	min_length=input_len + 4,
	repetition_penalty=1.3,
	do_sample=True,
	pad_token_id=tokenizer.pad_token_id,
	eos_token_id=tokenizer.eos_token_id # Stop at end of sentence
	)
	response = tokenizer.batch_decode(
	generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True
	)[0]
	response = response.split("### Response :")[-1].lstrip()

	# Extract only the first AI response to prevent multiple responses
	if "[\|AI\|]" in response and "[\|Human\|]" in response:
	# If there are multiple turns, take only the first AI response
	response = response.split("[\|Human\|]")[0].strip()
	elif "[\|AI\|]" in response:
	# Remove the [\|AI\|] marker from the beginning
	response = response.replace("[\|AI\|]", "").strip()

	return response

	def format_conversation_history(chat_history):
	messages = []
	for item in chat_history:
	role = item["role"]
	content = item["content"]
	if isinstance(content, list):
	content = content[0]["text"] if content and "text" in content[0] else str(content)
	messages.append({"role": role, "content": content})
	return messages

	def detect_language(text):
	"""Simple language detection - Arabic vs English"""
	arabic_chars = sum(1 for char in text if '\u0600' <= char <= '\u06FF')
	total_chars = len(text.replace(' ', ''))

	if total_chars == 0:
	return 'ar' # default to Arabic

	arabic_ratio = arabic_chars / total_chars
	return 'ar' if arabic_ratio > 0.3 else 'en'

	@spaces.GPU()
	def generate_response(input_data, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty):
	# Build conversation for Iraqi model format
	conversation_parts = []

	# Add chat history
	if chat_history:
	for item in chat_history:
	role = item["role"]
	content = item["content"]
	if isinstance(content, list):
	content = content[0]["text"] if content and "text" in content[0] else str(content)

	if role == "user":
	conversation_parts.append(f"[\|Human\|] {content}")
	elif role == "assistant":
	conversation_parts.append(f"[\|AI\|] {content}")

	# Add current user message
	conversation_parts.append(f"[\|Human\|] {input_data}")

	# Join conversation
	conversation = "\n".join(conversation_parts)

	# Create full prompt using the Iraqi Arabic prompt template
	full_prompt = prompt_ar.format(Question=conversation)

	try:
	# استخدام دالة get_response من documentation
	response = get_response(full_prompt)

	# استخراج الرد الجديد فقط (بعد "### Response :")
	if "### Response :" in response:
	response = response.split("### Response :")[-1].strip()

	if not response:
	response = "أهلاً! أنا أليكس مساعد خدمة العملاء. كيف أقدر أساعدك اليوم؟"

	yield response

	except Exception as e:
	print(f"Error in generate_response: {e}")
	import traceback
	print(traceback.format_exc())
	yield "أهلاً! أنا أليكس مساعد خدمة العملاء. كيف أقدر أساعدك اليوم؟"

	demo = gr.ChatInterface(
	fn=generate_response,
	additional_inputs=[
	gr.Slider(label="الحد الأقصى للكلمات الجديدة", minimum=64, maximum=4096, step=1, value=2048),
	gr.Slider(label="درجة الحرارة", minimum=0.1, maximum=2.0, step=0.1, value=0.7),
	gr.Slider(label="Top-p", minimum=0.05, maximum=1.0, step=0.05, value=0.9),
	gr.Slider(label="Top-k", minimum=1, maximum=100, step=1, value=50),
	gr.Slider(label="عقوبة التكرار", minimum=1.0, maximum=2.0, step=0.05, value=1.0)
	],
	examples=[
	[{"text": "شرح لي كيف يشتغل الذكاء الاصطناعي"}],
	[{"text": "قولي قصة قصيرة بالعراقي"}],
	[{"text": "شنو رأيك بالوضع الاقتصادي الحالي؟"}],
	[{"text": "ساعدني أفهم البرمجة"}],
	[{"text": "أعطيني نصيحة للحياة اليومية"}],
	],
	cache_examples=False,
	type="messages",
	title="ذكاء عراقي - Iraqi AI Assistant",
	description="""🤖 ذكاء صناعي يتحدث باللهجة العراقية

	✨ المميزات:
	- 🇮🇶 لهجة عراقية أصيلة وطبيعية
	- 🧠 إجابات ذكية واحترافية
	- 💬 محادثات متنوعة بالعراقي
	- 🎯 مدعوم بـ موديل ذكي مع تحسينات الأداء

	احجي مع الذكاء الاصطناعي باللهجة العراقية في أي موضوع تريده.""",
	fill_height=True,
	textbox=gr.Textbox(
	label="اكتب رسالتك هنا",
	placeholder="مثال: شرح لي موضوع معقد..."
	),
	stop_btn="إيقاف التوليد",
	multimodal=False,
	theme=gr.themes.Soft()
	)

	if __name__ == "__main__":
	demo.launch()