Mine / app.py
abolfsaidi's picture
Update app.py
5849d20 verified
import gradio as gr
import os
import re
import time
import torch
import tempfile
from datetime import datetime
from transformers import AutoTokenizer, AutoModelForCausalLM
from gtts import gTTS
from PIL import Image
from diffusers import StableDiffusionPipeline
from PyPDF2 import PdfReader
import speech_recognition as sr
# ================== تنظیمات اولیه ==================
os.makedirs("conversations", exist_ok=True) # پوشه آرشیو گفتگوها
en_model_name = "HuggingFaceH4/zephyr-7b-beta"
fa_model_name = "HooshvareLab/gpt2-fa"
# بارگذاری مدل‌ها
en_tokenizer = AutoTokenizer.from_pretrained(en_model_name)
en_model = AutoModelForCausalLM.from_pretrained(en_model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
en_model.eval()
fa_tokenizer = AutoTokenizer.from_pretrained(fa_model_name)
fa_model = AutoModelForCausalLM.from_pretrained(fa_model_name)
fa_model.eval()
# بارگذاری مدل تولید تصویر
image_pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
image_pipe = image_pipe.to("cuda" if torch.cuda.is_available() else "cpu")
# ================== توابع کمکی ==================
def detect_language(text):
return bool(re.search(r'[\u0600-\u06FF]', text))
def save_conversation(history, file_path):
with open(file_path, "w", encoding="utf-8") as f:
for user, bot in history:
f.write(f"User: {user}\nAssistant: {bot}\n\n")
def load_conversation(file_path):
history = []
with open(file_path, encoding="utf-8") as f:
content = f.read().strip().split("\n\n")
for turn in content:
if turn.strip():
parts = turn.split("\n")
if len(parts) == 2:
user = parts[0].replace("User: ", "")
bot = parts[1].replace("Assistant: ", "")
history.append((user, bot))
return history
def list_conversations():
files = os.listdir("conversations")
files.sort(reverse=True)
return [f for f in files if f.endswith(".txt")]
# ================== تابع اصلی چت ==================
def chat_with_bot(message, history, selected_file):
is_farsi = detect_language(message)
history = history or []
full_prompt = ""
for user, bot in history:
full_prompt += f"User: {user}\nAssistant: {bot}\n"
full_prompt += f"User: {message}\nAssistant:"
if is_farsi:
inputs = fa_tokenizer(full_prompt, return_tensors="pt", truncation=True)
outputs = fa_model.generate(**inputs, max_new_tokens=100, pad_token_id=fa_tokenizer.eos_token_id)
response = fa_tokenizer.decode(outputs[0], skip_special_tokens=True)
else:
inputs = en_tokenizer(full_prompt, return_tensors="pt", truncation=True)
outputs = en_model.generate(**inputs, max_new_tokens=150, pad_token_id=en_tokenizer.eos_token_id)
response = en_tokenizer.decode(outputs[0], skip_special_tokens=True)
response = response.split("Assistant:")[-1].strip()
history.append((message, response))
timestamp = selected_file if selected_file else datetime.now().strftime("%Y-%m-%d_%H-%M") + ".txt"
save_conversation(history, os.path.join("conversations", timestamp))
# تولید صدا
tts = gTTS(text=response, lang='fa' if is_farsi else 'en')
audio_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
tts.save(audio_path)
return history, history, audio_path, list_conversations(), timestamp
# ================== پردازش فایل‌ها ==================
def handle_pdf(file):
reader = PdfReader(file.name)
text = "\n".join(page.extract_text() for page in reader.pages if page.extract_text())
summary = text[:1000] + ("..." if len(text) > 1000 else "")
return summary
def handle_image(file):
import pytesseract
image = Image.open(file.name)
return pytesseract.image_to_string(image, lang='fas+eng')
def handle_audio(file):
recognizer = sr.Recognizer()
with sr.AudioFile(file.name) as source:
audio_data = recognizer.record(source)
try:
text = recognizer.recognize_google(audio_data, language="fa-IR")
except:
text = "صدا قابل شناسایی نبود."
return text
def generate_image(prompt):
image = image_pipe(prompt).images[0]
return image
# ================== رابط گرافیکی ==================
with gr.Blocks() as demo:
gr.Markdown("# AIChat-FaEnPro | دستیار هوشمند فارسی-انگلیسی")
with gr.Row():
with gr.Column():
chatbot = gr.Chatbot()
msg = gr.Textbox(label="پیام شما")
audio_in = gr.Audio(source="microphone", type="filepath", label="ورودی صوتی")
submit = gr.Button("ارسال")
clear = gr.Button("پاک کردن گفتگو")
archive_dropdown = gr.Dropdown(label="گفتگوهای ذخیره‌شده", choices=list_conversations(), interactive=True)
with gr.Column():
file_input = gr.File(label="آپلود فایل (PDF/تصویر/صدا)")
file_output = gr.Textbox(label="نتیجه پردازش فایل")
image_prompt = gr.Textbox(label="دستور تولید تصویر")
image_out = gr.Image(label="تصویر تولید شده")
audio_out = gr.Audio(label="صدای پاسخ")
state = gr.State([])
file_state = gr.State("")
submit.click(chat_with_bot, [msg, state, file_state], [chatbot, state, audio_out, archive_dropdown, file_state])
clear.click(lambda: ([], [], None, list_conversations(), ""), None, [chatbot, state, audio_out, archive_dropdown, file_state])
archive_dropdown.change(lambda f: (load_conversation(os.path.join("conversations", f)), f), [archive_dropdown], [state, file_state])
audio_in.change(lambda f, h, s: chat_with_bot(handle_audio(f), h, s), [audio_in, state, file_state], [chatbot, state, audio_out, archive_dropdown, file_state])
file_input.change(lambda f: handle_pdf(f) if f.name.endswith(".pdf") else handle_image(f) if f.type.startswith("image") else handle_audio(f), file_input, file_output)
image_prompt.submit(generate_image, image_prompt, image_out)
gr.Markdown("طراحی شده توسط شما، با قدرت ChatGPT")
demo.launch()