import os import streamlit as st from huggingface_hub import snapshot_download from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter st.set_page_config(page_title="Ulimi AI", layout="wide") st.title("🌽 Ulimi AI – Agricultural Intelligence for Malawi") DOCS_DIR = "docs" if not os.path.exists(DOCS_DIR): with st.spinner("Downloading knowledge base..."): snapshot_download( repo_id="MicohEscobar/Ulimi-AI-Docs", repo_type="dataset", local_dir=DOCS_DIR ) pdfs = [] for file in os.listdir(DOCS_DIR): if file.endswith(".pdf"): loader = PyPDFLoader(os.path.join(DOCS_DIR, file)) pdfs.extend(loader.load()) st.success(f"Loaded {len(pdfs)} document pages") st.write("RAG pipeline ready. Embeddings + QA next.")