import gradio as gr import os import torch import spaces from langchain_community.document_loaders import PyPDFLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_huggingface import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain_cohere import ChatCohere from langchain_classic.chains import RetrievalQA # 1. SETUP: DEFINING THE MODELS # We use a smaller/faster multilingual embedding model for retrieval embedding_model_name = "intfloat/multilingual-e5-large" # Detect device - use CUDA if available, otherwise CPU device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {device}") # Initialize Embeddings - will use GPU when available via ZeroGPU embeddings = None def get_embeddings(): global embeddings if embeddings is None: embeddings = HuggingFaceEmbeddings( model_name=embedding_model_name, model_kwargs={"device": device} ) return embeddings @spaces.GPU(duration=120) def process_and_chat(file, query): try: if not file: return "Please upload a PDF file first." if not query: return "Please enter a question." # Check for API key api_key = os.environ.get("COHERE_API_KEY") if not api_key: return "**Error:** COHERE_API_KEY environment variable is not set." # Initialize LLM (Cohere Command R - faster model) llm = ChatCohere(model="command-a-03-2025", temperature=0.3, cohere_api_key=api_key) # 2. LOAD & PROCESS DOCUMENT # Gradio 6.x returns file path as string directly file_path = file if isinstance(file, str) else file.name loader = PyPDFLoader(file_path) documents = loader.load() # Split text into chunks (Arabic text needs careful splitting) text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, separators=["\n\n", "\n", "。", ".", " ", ""] ) texts = text_splitter.split_documents(documents) # 3. CREATE VECTOR STORE (In-Memory FAISS - faster than Chroma) # This turns your Arabic text into searchable vectors db = FAISS.from_documents(texts, get_embeddings()) retriever = db.as_retriever(search_kwargs={"k": 3}) # Retrieve top 3 chunks (faster) # 4. RAG CHAIN qa_chain = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True ) # 5. GENERATE ANSWER # We add a specific instruction for Arabic augmented_query = f"Answer the following question in Arabic based ONLY on the context provided. If you don't know, say you don't know. Question: {query}" response = qa_chain.invoke(augmented_query) # Optional: Format sources sources = [doc.page_content[:100] + "..." for doc in response['source_documents']] return f"**Answer:**\n{response['result']}\n\n**Sources:**\n" + "\n".join(sources) except Exception as e: return f"**Error:** {str(e)}" # 6. BUILD UI iface = gr.Interface( fn=process_and_chat, inputs=[ gr.File(label="Upload Arabic PDF"), gr.Textbox(label="Ask a question in Arabic", placeholder="ما هي النقاط الرئيسية في هذا المستند؟") ], outputs=gr.Markdown(), title="Arabic RAG (Command R)", description="Upload a PDF and ask questions. Powered by Cohere Command R and Multilingual-E5-Small embeddings." ) iface.launch(share=True)