ArabEdu / app.py
Alaa16's picture
Upload 2 files
04b4868 verified
import os
import numpy as np
import faiss
import torch
import gradio as gr
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
from sentence_transformers import SentenceTransformer
import librosa
device = "cpu"
# --------------- Load Models ---------------
asr_pipeline = pipeline(
"automatic-speech-recognition",
model="openai/whisper-small",
chunk_length_s=30,
device=device,
)
forced_decoder_ids = asr_pipeline.tokenizer.get_decoder_prompt_ids(
language="arabic", task="transcribe"
)
summ_model_name = "csebuetnlp/mT5_multilingual_XLSum"
summ_tokenizer = AutoTokenizer.from_pretrained(summ_model_name)
summ_model = AutoModelForSeq2SeqLM.from_pretrained(summ_model_name)
embedding_model = SentenceTransformer("intfloat/multilingual-e5-base")
embedding_dim = embedding_model.get_sentence_embedding_dimension()
emotion_classifier = pipeline(
"audio-classification",
model="Dpngtm/wav2vec2-emotion-recognition",
device=-1,
)
# --------------- FAISS Index ---------------
index = faiss.IndexFlatIP(embedding_dim)
text_segments = []
KEYWORDS = {
"ุฐูƒุงุก ุงุตุทู†ุงุนูŠ": "AI", "ุชุนู„ู… ุนู…ูŠู‚": "Deep Learning",
"ุดุจูƒุฉ ุนุตุจูŠุฉ": "Neural Network", "ุชุนู„ู… ุขู„ูŠ": "Machine Learning",
"ู…ุนุงู„ุฌุฉ ุงู„ู„ุบุงุช": "NLP", "ุฑุคูŠุฉ ุญุงุณูˆุจูŠุฉ": "Computer Vision",
"ุจูŠุงู†ุงุช": "Data", "ู†ู…ูˆุฐุฌ": "Model", "ุชุฏุฑูŠุจ": "Training",
"ุฎูˆุงุฑุฒู…ูŠุฉ": "Algorithm", "ุชุตู†ูŠู": "Classification",
"ุงุณุชุฑุฌุงุน": "Retrieval", "ุชุญู„ูŠู„": "Analysis",
"ู…ุญุงุถุฑุฉ": "Lecture", "ุฌุงู…ุนุฉ": "University",
"ุจุญุซ": "Research", "ู…ุดุฑูˆุน": "Project",
}
EMOTION_ICONS = {
"happy": "๐Ÿ˜Š", "sad": "๐Ÿ˜ข", "angry": "๐Ÿ˜ก", "neutral": "๐Ÿ˜",
"calm": "๐Ÿ˜Œ", "fearful": "๐Ÿ˜จ", "disgust": "๐Ÿคข", "surprised": "๐Ÿ˜ฒ",
}
# --------------- Pipeline Functions ---------------
def encode_texts(texts, prefix="passage: "):
prefixed = [prefix + t for t in texts]
embeddings = embedding_model.encode(prefixed, normalize_embeddings=True)
return np.array(embeddings).astype("float32")
def transcribe_audio(audio_path):
result = asr_pipeline(
audio_path,
return_timestamps=True,
generate_kwargs={"forced_decoder_ids": forced_decoder_ids},
)
full_text = result["text"]
chunks = result.get("chunks", [])
if not chunks:
chunks = [{"text": full_text, "timestamp": (0.0, 0.0)}]
return full_text, chunks
def summarize_text(text, max_input=512, max_output=150):
inputs = summ_tokenizer(
[text.strip()],
max_length=max_input,
truncation=True,
padding="max_length",
return_tensors="pt",
)
summary_ids = summ_model.generate(
inputs["input_ids"],
attention_mask=inputs["attention_mask"],
num_beams=2,
max_length=max_output,
early_stopping=True,
no_repeat_ngram_size=3,
)
return summ_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
def detect_emotion(audio_path):
audio, sr = librosa.load(audio_path, sr=16000, duration=15.0)
predictions = emotion_classifier({"array": audio, "sampling_rate": sr})
top = max(predictions, key=lambda x: x["score"])
return top["label"], top["score"]
def detect_keywords(text):
found = []
for ar, en in KEYWORDS.items():
count = text.count(ar)
if count > 0:
found.append({"keyword_ar": ar, "keyword_en": en, "count": count})
found.sort(key=lambda x: x["count"], reverse=True)
return found
def index_segments(chunks):
global index, text_segments
index = faiss.IndexFlatIP(embedding_dim)
text_segments = chunks
segment_texts = [c["text"] for c in chunks]
embeddings = encode_texts(segment_texts, prefix="passage: ")
index.add(embeddings)
return len(chunks)
def search_query(query, top_k=3):
if index.ntotal == 0:
return "ู„ู… ูŠุชู… ุชุญู…ูŠู„ ุฃูŠ ู…ู„ู ุตูˆุชูŠ ุจุนุฏ. ู‚ู… ุจุฑูุน ู…ู„ู ุฃูˆู„ุงู‹."
query_emb = encode_texts([query], prefix="query: ")
scores, indices = index.search(query_emb, k=min(top_k, index.ntotal))
results = []
for rank, (i, score) in enumerate(zip(indices[0], scores[0]), 1):
if i < len(text_segments):
seg = text_segments[i]
start = seg["timestamp"][0] or 0.0
end = seg["timestamp"][1] or 0.0
sm, ss = int(start // 60), int(start % 60)
em, es = int(end // 60), int(end % 60)
time_str = f"{sm}:{ss:02d} - {em}:{es:02d}"
results.append(
f"**#{rank}** | ุชุทุงุจู‚: {score * 100:.1f}% | โฑ๏ธ {time_str}\n> {seg['text']}"
)
return "\n\n".join(results) if results else "ู„ุง ุชูˆุฌุฏ ู†ุชุงุฆุฌ"
# --------------- Main Process ---------------
def process_audio(audio_path, progress=gr.Progress()):
if audio_path is None:
raise gr.Error("ูŠุฑุฌู‰ ุฑูุน ู…ู„ู ุตูˆุชูŠ ุฃูˆู„ุงู‹")
progress(0.05, desc="ุชุญู„ูŠู„ ุงู„ู…ุดุงุนุฑ...")
emotion_label, emotion_conf = detect_emotion(audio_path)
icon = EMOTION_ICONS.get(emotion_label.lower(), "๐ŸŽต")
emotion_result = f"{icon} {emotion_label} ({emotion_conf * 100:.1f}%)"
progress(0.25, desc="ุชุญูˆูŠู„ ุงู„ุตูˆุช ุฅู„ู‰ ู†ุต...")
full_text, chunks = transcribe_audio(audio_path)
progress(0.60, desc="ุฅู†ุดุงุก ุงู„ู…ู„ุฎุต...")
summary = summarize_text(full_text)
progress(0.80, desc="ูู‡ุฑุณุฉ ุงู„ู…ู‚ุงุทุน...")
n_segments = index_segments(chunks)
progress(0.90, desc="ุงุณุชุฎุฑุงุฌ ุงู„ูƒู„ู…ุงุช ุงู„ู…ูุชุงุญูŠุฉ...")
keywords = detect_keywords(full_text)
kw_text = " ".join(
[f"๐Ÿ”‘ {k['keyword_ar']} ({k['keyword_en']}) ร—{k['count']}" for k in keywords]
)
if not kw_text:
kw_text = "ู„ู… ูŠุชู… ุงู„ุนุซูˆุฑ ุนู„ู‰ ูƒู„ู…ุงุช ู…ูุชุงุญูŠุฉ"
seg_info = f"โœ… ุชู… ูู‡ุฑุณุฉ {n_segments} ู…ู‚ุทุน ู„ู„ุจุญุซ ุงู„ุฏู„ุงู„ูŠ"
progress(1.0, desc="ุชู…!")
return emotion_result, full_text, summary, kw_text, seg_info
def do_search(query):
if not query or not query.strip():
return "ูŠุฑุฌู‰ ุฅุฏุฎุงู„ ุงุณุชุนู„ุงู… ู„ู„ุจุญุซ"
return search_query(query.strip(), top_k=5)
# --------------- Gradio UI ---------------
CUSTOM_CSS = """
.gradio-container {
max-width: 1200px !important;
font-family: 'Inter', sans-serif !important;
}
.main-title {
text-align: center;
background: linear-gradient(135deg, #49f4c8, #7c3aed);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-size: 2.5rem;
font-weight: 800;
margin-bottom: 0.5rem;
}
.sub-title {
text-align: center;
color: #a0abc2;
font-size: 1.1rem;
margin-bottom: 2rem;
}
"""
with gr.Blocks(
theme=gr.themes.Base(
primary_hue=gr.themes.colors.emerald,
secondary_hue=gr.themes.colors.purple,
neutral_hue=gr.themes.colors.slate,
font=gr.themes.GoogleFont("Inter"),
),
css=CUSTOM_CSS,
title="ArabEdu",
) as demo:
gr.HTML(
"""
<div class="main-title">ArabEdu</div>
<div class="sub-title">
ู†ุธุงู… ูู‡ู… ุงู„ู…ุญุงุถุฑุงุช ุงู„ุนุฑุจูŠุฉ โ€” ุญูˆู‘ู„ ู…ุญุงุถุฑุงุชูƒ ุงู„ุตูˆุชูŠุฉ ุฅู„ู‰ ู†ุตูˆุต ุฐูƒูŠุฉ ูˆู…ู„ุฎุตุงุช ุฏู‚ูŠู‚ุฉ
</div>
"""
)
with gr.Row():
audio_input = gr.Audio(
label="๐Ÿ“ ุฑูุน ุงู„ู…ู„ู ุงู„ุตูˆุชูŠ",
type="filepath",
sources=["upload", "microphone"],
)
process_btn = gr.Button(
"๐Ÿš€ ู…ุนุงู„ุฌุฉ ุงู„ู…ู„ู ุงู„ุตูˆุชูŠ",
variant="primary",
size="lg",
)
with gr.Row():
emotion_output = gr.Textbox(
label="๐ŸŽญ ุชุญู„ูŠู„ ุงู„ู…ุดุงุนุฑ ุงู„ุตูˆุชูŠุฉ",
interactive=False,
scale=1,
)
with gr.Row():
with gr.Column(scale=2):
transcript_output = gr.Textbox(
label="๐Ÿ“ ุงู„ู†ุต ุงู„ูƒุงู…ู„",
interactive=False,
lines=10,
rtl=True,
)
with gr.Column(scale=1):
summary_output = gr.Textbox(
label="๐Ÿ“‹ ุงู„ู…ู„ุฎุต",
interactive=False,
lines=6,
rtl=True,
)
keywords_output = gr.Textbox(
label="๐Ÿ”‘ ุงู„ูƒู„ู…ุงุช ุงู„ู…ูุชุงุญูŠุฉ",
interactive=False,
lines=3,
rtl=True,
)
seg_info_output = gr.Textbox(
label="ูู‡ุฑุณุฉ",
interactive=False,
visible=True,
)
gr.Markdown("---")
gr.Markdown("### ๐Ÿ” ุงู„ุจุญุซ ุงู„ุฏู„ุงู„ูŠ ููŠ ุงู„ู…ุญุชูˆู‰")
with gr.Row():
search_input = gr.Textbox(
label="ุงุจุญุซ ุนู† ู…ูˆุถูˆุน ู…ุนูŠู† ููŠ ุงู„ุชุณุฌูŠู„",
placeholder="ู…ุซุงู„: ู…ุง ู‡ูˆ ุงู„ุฐูƒุงุก ุงู„ุงุตุทู†ุงุนูŠุŸ",
scale=4,
rtl=True,
)
search_btn = gr.Button("๐Ÿ” ุจุญุซ", variant="secondary", scale=1)
search_output = gr.Markdown(label="ู†ุชุงุฆุฌ ุงู„ุจุญุซ", rtl=True)
process_btn.click(
fn=process_audio,
inputs=[audio_input],
outputs=[
emotion_output,
transcript_output,
summary_output,
keywords_output,
seg_info_output,
],
)
search_btn.click(
fn=do_search,
inputs=[search_input],
outputs=[search_output],
)
search_input.submit(
fn=do_search,
inputs=[search_input],
outputs=[search_output],
)
demo.queue()
demo.launch()