import gradio as gr import os import re import time import torch import tempfile from datetime import datetime from transformers import AutoTokenizer, AutoModelForCausalLM from gtts import gTTS from PIL import Image from diffusers import StableDiffusionPipeline from PyPDF2 import PdfReader import speech_recognition as sr # ================== تنظیمات اولیه ================== os.makedirs("conversations", exist_ok=True) # پوشه آرشیو گفتگوها en_model_name = "HuggingFaceH4/zephyr-7b-beta" fa_model_name = "HooshvareLab/gpt2-fa" # بارگذاری مدل‌ها en_tokenizer = AutoTokenizer.from_pretrained(en_model_name) en_model = AutoModelForCausalLM.from_pretrained(en_model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32) en_model.eval() fa_tokenizer = AutoTokenizer.from_pretrained(fa_model_name) fa_model = AutoModelForCausalLM.from_pretrained(fa_model_name) fa_model.eval() # بارگذاری مدل تولید تصویر image_pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32) image_pipe = image_pipe.to("cuda" if torch.cuda.is_available() else "cpu") # ================== توابع کمکی ================== def detect_language(text): return bool(re.search(r'[\u0600-\u06FF]', text)) def save_conversation(history, file_path): with open(file_path, "w", encoding="utf-8") as f: for user, bot in history: f.write(f"User: {user}\nAssistant: {bot}\n\n") def load_conversation(file_path): history = [] with open(file_path, encoding="utf-8") as f: content = f.read().strip().split("\n\n") for turn in content: if turn.strip(): parts = turn.split("\n") if len(parts) == 2: user = parts[0].replace("User: ", "") bot = parts[1].replace("Assistant: ", "") history.append((user, bot)) return history def list_conversations(): files = os.listdir("conversations") files.sort(reverse=True) return [f for f in files if f.endswith(".txt")] # ================== تابع اصلی چت ================== def chat_with_bot(message, history, selected_file): is_farsi = detect_language(message) history = history or [] full_prompt = "" for user, bot in history: full_prompt += f"User: {user}\nAssistant: {bot}\n" full_prompt += f"User: {message}\nAssistant:" if is_farsi: inputs = fa_tokenizer(full_prompt, return_tensors="pt", truncation=True) outputs = fa_model.generate(**inputs, max_new_tokens=100, pad_token_id=fa_tokenizer.eos_token_id) response = fa_tokenizer.decode(outputs[0], skip_special_tokens=True) else: inputs = en_tokenizer(full_prompt, return_tensors="pt", truncation=True) outputs = en_model.generate(**inputs, max_new_tokens=150, pad_token_id=en_tokenizer.eos_token_id) response = en_tokenizer.decode(outputs[0], skip_special_tokens=True) response = response.split("Assistant:")[-1].strip() history.append((message, response)) timestamp = selected_file if selected_file else datetime.now().strftime("%Y-%m-%d_%H-%M") + ".txt" save_conversation(history, os.path.join("conversations", timestamp)) # تولید صدا tts = gTTS(text=response, lang='fa' if is_farsi else 'en') audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name tts.save(audio_path) return history, history, audio_path, list_conversations(), timestamp # ================== پردازش فایل‌ها ================== def handle_pdf(file): reader = PdfReader(file.name) text = "\n".join(page.extract_text() for page in reader.pages if page.extract_text()) summary = text[:1000] + ("..." if len(text) > 1000 else "") return summary def handle_image(file): import pytesseract image = Image.open(file.name) return pytesseract.image_to_string(image, lang='fas+eng') def handle_audio(file): recognizer = sr.Recognizer() with sr.AudioFile(file.name) as source: audio_data = recognizer.record(source) try: text = recognizer.recognize_google(audio_data, language="fa-IR") except: text = "صدا قابل شناسایی نبود." return text def generate_image(prompt): image = image_pipe(prompt).images[0] return image # ================== رابط گرافیکی ================== with gr.Blocks() as demo: gr.Markdown("# AIChat-FaEnPro | دستیار هوشمند فارسی-انگلیسی") with gr.Row(): with gr.Column(): chatbot = gr.Chatbot() msg = gr.Textbox(label="پیام شما") audio_in = gr.Audio(source="microphone", type="filepath", label="ورودی صوتی") submit = gr.Button("ارسال") clear = gr.Button("پاک کردن گفتگو") archive_dropdown = gr.Dropdown(label="گفتگوهای ذخیره‌شده", choices=list_conversations(), interactive=True) with gr.Column(): file_input = gr.File(label="آپلود فایل (PDF/تصویر/صدا)") file_output = gr.Textbox(label="نتیجه پردازش فایل") image_prompt = gr.Textbox(label="دستور تولید تصویر") image_out = gr.Image(label="تصویر تولید شده") audio_out = gr.Audio(label="صدای پاسخ") state = gr.State([]) file_state = gr.State("") submit.click(chat_with_bot, [msg, state, file_state], [chatbot, state, audio_out, archive_dropdown, file_state]) clear.click(lambda: ([], [], None, list_conversations(), ""), None, [chatbot, state, audio_out, archive_dropdown, file_state]) archive_dropdown.change(lambda f: (load_conversation(os.path.join("conversations", f)), f), [archive_dropdown], [state, file_state]) audio_in.change(lambda f, h, s: chat_with_bot(handle_audio(f), h, s), [audio_in, state, file_state], [chatbot, state, audio_out, archive_dropdown, file_state]) file_input.change(lambda f: handle_pdf(f) if f.name.endswith(".pdf") else handle_image(f) if f.type.startswith("image") else handle_audio(f), file_input, file_output) image_prompt.submit(generate_image, image_prompt, image_out) gr.Markdown("طراحی شده توسط شما، با قدرت ChatGPT") demo.launch()