Spaces:

dyabmo
/

arabic_rag_chat

Sleeping

Mohamed Dyab

faster

d76d1b0 about 1 month ago

3.73 kB

	import gradio as gr
	import os
	import torch
	import spaces
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain_cohere import ChatCohere
	from langchain_classic.chains import RetrievalQA

	# 1. SETUP: DEFINING THE MODELS
	# We use a smaller/faster multilingual embedding model for retrieval
	embedding_model_name = "intfloat/multilingual-e5-large"

	# Detect device - use CUDA if available, otherwise CPU
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Using device: {device}")

	# Initialize Embeddings - will use GPU when available via ZeroGPU
	embeddings = None

	def get_embeddings():
	global embeddings
	if embeddings is None:
	embeddings = HuggingFaceEmbeddings(
	model_name=embedding_model_name,
	model_kwargs={"device": device}
	)
	return embeddings

	@spaces.GPU(duration=120)
	def process_and_chat(file, query):
	try:
	if not file:
	return "Please upload a PDF file first."

	if not query:
	return "Please enter a question."

	# Check for API key
	api_key = os.environ.get("COHERE_API_KEY")
	if not api_key:
	return "Error: COHERE_API_KEY environment variable is not set."

	# Initialize LLM (Cohere Command R - faster model)
	llm = ChatCohere(model="command-a-03-2025", temperature=0.3, cohere_api_key=api_key)

	# 2. LOAD & PROCESS DOCUMENT
	# Gradio 6.x returns file path as string directly
	file_path = file if isinstance(file, str) else file.name
	loader = PyPDFLoader(file_path)
	documents = loader.load()

	# Split text into chunks (Arabic text needs careful splitting)
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=1000,
	chunk_overlap=200,
	separators=["\n\n", "\n", "。", ".", " ", ""]
	)
	texts = text_splitter.split_documents(documents)

	# 3. CREATE VECTOR STORE (In-Memory FAISS - faster than Chroma)
	# This turns your Arabic text into searchable vectors
	db = FAISS.from_documents(texts, get_embeddings())
	retriever = db.as_retriever(search_kwargs={"k": 3}) # Retrieve top 3 chunks (faster)

	# 4. RAG CHAIN
	qa_chain = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=retriever,
	return_source_documents=True
	)

	# 5. GENERATE ANSWER
	# We add a specific instruction for Arabic
	augmented_query = f"Answer the following question in Arabic based ONLY on the context provided. If you don't know, say you don't know. Question: {query}"

	response = qa_chain.invoke(augmented_query)

	# Optional: Format sources
	sources = [doc.page_content[:100] + "..." for doc in response['source_documents']]
	return f"Answer:\n{response['result']}\n\nSources:\n" + "\n".join(sources)
	except Exception as e:
	return f"Error: {str(e)}"

	# 6. BUILD UI
	iface = gr.Interface(
	fn=process_and_chat,
	inputs=[
	gr.File(label="Upload Arabic PDF"),
	gr.Textbox(label="Ask a question in Arabic", placeholder="ما هي النقاط الرئيسية في هذا المستند؟")
	],
	outputs=gr.Markdown(),
	title="Arabic RAG (Command R)",
	description="Upload a PDF and ask questions. Powered by Cohere Command R and Multilingual-E5-Small embeddings."
	)

	iface.launch(share=True)