denis commited on
Commit
e39e1b7
·
1 Parent(s): a02e387

Initial Ulimi AI space with extenarnal pdf dataset

Browse files
Files changed (2) hide show
  1. app.py +28 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from huggingface_hub import snapshot_download
4
+ from langchain.document_loaders import PyPDFLoader
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+
7
+ st.set_page_config(page_title="Ulimi AI", layout="wide")
8
+ st.title("🌽 Ulimi AI – Agricultural Intelligence for Malawi")
9
+
10
+ DOCS_DIR = "docs"
11
+
12
+ if not os.path.exists(DOCS_DIR):
13
+ with st.spinner("Downloading knowledge base..."):
14
+ snapshot_download(
15
+ repo_id="MicohEscobar/Ulimi-AI-Docs",
16
+ repo_type="dataset",
17
+ local_dir=DOCS_DIR
18
+ )
19
+
20
+ pdfs = []
21
+ for file in os.listdir(DOCS_DIR):
22
+ if file.endswith(".pdf"):
23
+ loader = PyPDFLoader(os.path.join(DOCS_DIR, file))
24
+ pdfs.extend(loader.load())
25
+
26
+ st.success(f"Loaded {len(pdfs)} document pages")
27
+
28
+ st.write("RAG pipeline ready. Embeddings + QA next.")
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ langchain
3
+ langchain-community
4
+ pypdf
5
+ sentence-transformers
6
+ faiss-cpu
7
+ huggingface_hub