Ulimi-AI / app.py
denis
Initial Ulimi AI space with extenarnal pdf dataset
e39e1b7
raw
history blame contribute delete
854 Bytes
import os
import streamlit as st
from huggingface_hub import snapshot_download
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
st.set_page_config(page_title="Ulimi AI", layout="wide")
st.title("🌽 Ulimi AI – Agricultural Intelligence for Malawi")
DOCS_DIR = "docs"
if not os.path.exists(DOCS_DIR):
with st.spinner("Downloading knowledge base..."):
snapshot_download(
repo_id="MicohEscobar/Ulimi-AI-Docs",
repo_type="dataset",
local_dir=DOCS_DIR
)
pdfs = []
for file in os.listdir(DOCS_DIR):
if file.endswith(".pdf"):
loader = PyPDFLoader(os.path.join(DOCS_DIR, file))
pdfs.extend(loader.load())
st.success(f"Loaded {len(pdfs)} document pages")
st.write("RAG pipeline ready. Embeddings + QA next.")