arabic_rag_chat / app.py
Mohamed Dyab
faster
d76d1b0
import gradio as gr
import os
import torch
import spaces
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_cohere import ChatCohere
from langchain_classic.chains import RetrievalQA
# 1. SETUP: DEFINING THE MODELS
# We use a smaller/faster multilingual embedding model for retrieval
embedding_model_name = "intfloat/multilingual-e5-large"
# Detect device - use CUDA if available, otherwise CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# Initialize Embeddings - will use GPU when available via ZeroGPU
embeddings = None
def get_embeddings():
global embeddings
if embeddings is None:
embeddings = HuggingFaceEmbeddings(
model_name=embedding_model_name,
model_kwargs={"device": device}
)
return embeddings
@spaces.GPU(duration=120)
def process_and_chat(file, query):
try:
if not file:
return "Please upload a PDF file first."
if not query:
return "Please enter a question."
# Check for API key
api_key = os.environ.get("COHERE_API_KEY")
if not api_key:
return "**Error:** COHERE_API_KEY environment variable is not set."
# Initialize LLM (Cohere Command R - faster model)
llm = ChatCohere(model="command-a-03-2025", temperature=0.3, cohere_api_key=api_key)
# 2. LOAD & PROCESS DOCUMENT
# Gradio 6.x returns file path as string directly
file_path = file if isinstance(file, str) else file.name
loader = PyPDFLoader(file_path)
documents = loader.load()
# Split text into chunks (Arabic text needs careful splitting)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
separators=["\n\n", "\n", "。", ".", " ", ""]
)
texts = text_splitter.split_documents(documents)
# 3. CREATE VECTOR STORE (In-Memory FAISS - faster than Chroma)
# This turns your Arabic text into searchable vectors
db = FAISS.from_documents(texts, get_embeddings())
retriever = db.as_retriever(search_kwargs={"k": 3}) # Retrieve top 3 chunks (faster)
# 4. RAG CHAIN
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever,
return_source_documents=True
)
# 5. GENERATE ANSWER
# We add a specific instruction for Arabic
augmented_query = f"Answer the following question in Arabic based ONLY on the context provided. If you don't know, say you don't know. Question: {query}"
response = qa_chain.invoke(augmented_query)
# Optional: Format sources
sources = [doc.page_content[:100] + "..." for doc in response['source_documents']]
return f"**Answer:**\n{response['result']}\n\n**Sources:**\n" + "\n".join(sources)
except Exception as e:
return f"**Error:** {str(e)}"
# 6. BUILD UI
iface = gr.Interface(
fn=process_and_chat,
inputs=[
gr.File(label="Upload Arabic PDF"),
gr.Textbox(label="Ask a question in Arabic", placeholder="ما هي النقاط الرئيسية في هذا المستند؟")
],
outputs=gr.Markdown(),
title="Arabic RAG (Command R)",
description="Upload a PDF and ask questions. Powered by Cohere Command R and Multilingual-E5-Small embeddings."
)
iface.launch(share=True)