onenote-clone / app.py
tiffank1802
feat: Auto-load notebooks from HF dataset after download
fea60fa
from flask import Flask, request, jsonify, send_from_directory
from flask_cors import CORS
import os
import json
import shutil
import logging
from datetime import datetime
from pathlib import Path
import re
logging.basicConfig(level=logging.INFO)
app = Flask(__name__, static_folder='static')
CORS(app)
DATASET_PATH = "dataset"
HF_TOKEN = os.environ.get('HF_TOKEN', '')
DATASET_ID = os.environ.get('DATASET_ID', 'ktongue/onenote-data')
logging.info(f"HF_TOKEN present: {bool(HF_TOKEN)}")
logging.info(f"DATASET_ID: {DATASET_ID}")
api = None
if HF_TOKEN and len(HF_TOKEN) > 10:
from huggingface_hub import login, HfApi
login(HF_TOKEN)
api = HfApi()
logging.info("Logged in to HuggingFace")
def sync_to_hf():
global api
if not HF_TOKEN or not api:
return
try:
for f in Path(DATASET_PATH).rglob("*"):
if f.is_file():
rel = str(f.relative_to(DATASET_PATH))
api.upload_file(
path_or_fileobj=str(f),
path_in_repo=rel,
repo_id=DATASET_ID,
repo_type='dataset',
token=HF_TOKEN,
commit_message="Update notes"
)
logging.info("Synced to HF")
except Exception as e:
logging.error(f"Sync error: {e}")
def load_from_hf():
global api
if not HF_TOKEN or not api:
return
try:
Path(DATASET_PATH).mkdir(exist_ok=True)
logging.info(f"Downloading dataset from {DATASET_ID}...")
api.snapshot_download(
repo_id=DATASET_ID,
repo_type='dataset',
local_dir=DATASET_PATH,
token=HF_TOKEN
)
logging.info("Loaded dataset from HF")
# Après le téléchargement, scanner les notebooks et les ajouter à l'index
scan_and_load_notebooks_from_disk()
except Exception as e:
logging.error(f"Load error: {e}")
logging.info("Using local dataset")
def scan_and_load_notebooks_from_disk():
"""Scanne le dossier dataset/notebooks et ajoute les notebooks trouvés à l'index"""
notebooks_dir = Path(DATASET_PATH) / "notebooks"
index_file = Path(DATASET_PATH) / "notebooks.json"
if not notebooks_dir.exists():
logging.info("No notebooks directory found in dataset")
return
notebooks = []
try:
for notebook_folder in sorted(notebooks_dir.iterdir()):
if not notebook_folder.is_dir():
continue
notebook_json = notebook_folder / "notebook.json"
if notebook_json.exists():
try:
with open(notebook_json, 'r', encoding='utf-8') as f:
data = json.load(f)
notebooks.append({
"id": data.get("id"),
"folder": notebook_folder.name
})
logging.info(f"Found notebook in dataset: {data.get('name')}")
except Exception as e:
logging.error(f"Error reading notebook: {e}")
except Exception as e:
logging.error(f"Error scanning notebooks directory: {e}")
# Si des notebooks ont été trouvés, mettre à jour l'index
if notebooks:
index_data = {
"version": "1.0",
"last_updated": datetime.now().isoformat(),
"notebooks": notebooks
}
index_file.parent.mkdir(parents=True, exist_ok=True)
with open(index_file, 'w', encoding='utf-8') as f:
json.dump(index_data, f, indent=2, ensure_ascii=False)
logging.info(f"Loaded {len(notebooks)} notebooks from dataset into index")
load_from_hf()
class FileManager:
def __init__(self, dataset_path: str = "dataset"):
self.dataset_path = Path(dataset_path)
self.notebooks_dir = self.dataset_path / "notebooks"
self.index_file = self.dataset_path / "notebooks.json"
self._init_structure()
def _init_structure(self):
self.dataset_path.mkdir(exist_ok=True)
self.notebooks_dir.mkdir(exist_ok=True)
if not self.index_file.exists():
self._save_json(self.index_file, {
"version": "1.0",
"last_updated": datetime.now().isoformat(),
"notebooks": []
})
def _save_json(self, path: Path, data: dict):
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
sync_to_hf()
def _load_json(self, path: Path) -> dict:
if not path.exists():
return {}
with open(path, 'r', encoding='utf-8') as f:
return json.load(f)
def _sanitize_name(self, name: str) -> str:
sanitized = re.sub(r'[<>:"/\\|?*]', '_', name)
return sanitized.strip('. ') or "untitled"
def _generate_id(self) -> str:
return datetime.now().strftime("%Y%m%d_%H%M%S_%f")
def create_notebook(self, name: str, color: str = "#7719AA") -> dict:
notebook_id = self._generate_id()
folder_name = f"{self._sanitize_name(name)}_{notebook_id[:8]}"
notebook_path = self.notebooks_dir / folder_name
notebook_path.mkdir(exist_ok=True)
(notebook_path / "sections").mkdir(exist_ok=True)
notebook_data = {
"id": notebook_id, "name": name, "color": color,
"folder_name": folder_name,
"created_at": datetime.now().isoformat(),
"updated_at": datetime.now().isoformat()
}
self._save_json(notebook_path / "notebook.json", notebook_data)
self._save_json(notebook_path / "sections.json", {"notebook_id": notebook_id, "sections": []})
index = self._load_json(self.index_file)
index["notebooks"].append({"id": notebook_id, "name": name, "color": color, "folder": folder_name})
index["last_updated"] = datetime.now().isoformat()
self._save_json(self.index_file, index)
return notebook_data
def get_notebook(self, notebook_id: str):
index = self._load_json(self.index_file)
for nb in index["notebooks"]:
if nb["id"] == notebook_id:
notebook_path = self.notebooks_dir / nb["folder"]
if notebook_path.exists():
data = self._load_json(notebook_path / "notebook.json")
data["sections"] = self._get_sections_list(notebook_path)
return data
return None
def get_all_notebooks(self):
index = self._load_json(self.index_file)
notebooks = []
for nb in index["notebooks"]:
notebook_path = self.notebooks_dir / nb["folder"]
if notebook_path.exists():
data = self._load_json(notebook_path / "notebook.json")
data["section_count"] = len(self._get_sections_list(notebook_path))
notebooks.append(data)
return notebooks
def delete_notebook(self, notebook_id: str) -> bool:
index = self._load_json(self.index_file)
for i, nb in enumerate(index["notebooks"]):
if nb["id"] == notebook_id:
notebook_path = self.notebooks_dir / nb["folder"]
if notebook_path.exists():
shutil.rmtree(notebook_path)
index["notebooks"].pop(i)
self._save_json(self.index_file, index)
return True
return False
def create_section(self, notebook_id: str, name: str):
notebook = self.get_notebook(notebook_id)
if not notebook:
return None
notebook_path = self.notebooks_dir / notebook["folder_name"]
section_id = self._generate_id()
folder_name = f"{self._sanitize_name(name)}_{section_id[:8]}"
section_path = notebook_path / "sections" / folder_name
section_path.mkdir(exist_ok=True)
(section_path / "pages").mkdir(exist_ok=True)
section_data = {
"id": section_id, "name": name, "folder_name": folder_name,
"notebook_id": notebook_id,
"created_at": datetime.now().isoformat(),
"updated_at": datetime.now().isoformat()
}
self._save_json(section_path / "section.json", section_data)
self._save_json(section_path / "pages.json", {"section_id": section_id, "pages": []})
sections_index = self._load_json(notebook_path / "sections.json")
sections_index["sections"].append({"id": section_id, "name": name, "folder": folder_name})
self._save_json(notebook_path / "sections.json", sections_index)
return section_data
def get_sections_by_notebook(self, notebook_id: str):
notebook = self.get_notebook(notebook_id)
if not notebook:
return []
notebook_path = self.notebooks_dir / notebook["folder_name"]
sections_index = self._load_json(notebook_path / "sections.json")
sections = []
for sec in sections_index.get("sections", []):
section_path = notebook_path / "sections" / sec["folder"]
if section_path.exists():
data = self._load_json(section_path / "section.json")
pages_index = self._load_json(section_path / "pages.json")
data["page_count"] = len(pages_index.get("pages", []))
sections.append(data)
return sections
def delete_section(self, section_id: str) -> bool:
for notebook_folder in self.notebooks_dir.iterdir():
if not notebook_folder.is_dir():
continue
sections_dir = notebook_folder / "sections"
if not sections_dir.exists():
continue
for section_folder in sections_dir.iterdir():
section_json = section_folder / "section.json"
if section_json.exists():
data = self._load_json(section_json)
if data.get("id") == section_id:
shutil.rmtree(section_folder)
sections_index = self._load_json(notebook_folder / "sections.json")
sections_index["sections"] = [s for s in sections_index["sections"] if s["id"] != section_id]
self._save_json(notebook_folder / "sections.json", sections_index)
return True
return False
def create_page(self, section_id: str, title: str = "Nouvelle page"):
section = self.get_section(section_id)
if not section:
return None
notebook_path = self.notebooks_dir / self._find_notebook_folder(section["notebook_id"])
section_path = notebook_path / "sections" / section["folder_name"]
page_id = self._generate_id()
folder_name = f"{self._sanitize_name(title)}_{page_id[:8]}"
page_path = section_path / "pages" / folder_name
page_path.mkdir(exist_ok=True)
page_data = {
"id": page_id, "title": title, "folder_name": folder_name,
"section_id": section_id,
"created_at": datetime.now().isoformat(),
"updated_at": datetime.now().isoformat(),
"content_file": "content.html",
"drawing_file": "drawing.json"
}
self._save_json(page_path / "page.json", page_data)
with open(page_path / "content.html", 'w', encoding='utf-8') as f:
f.write("<div></div>")
with open(page_path / "drawing.json", 'w', encoding='utf-8') as f:
f.write("[]")
pages_index = self._load_json(section_path / "pages.json")
pages_index["pages"].append({"id": page_id, "title": title, "folder": folder_name})
self._save_json(section_path / "pages.json", pages_index)
return page_data
def get_section(self, section_id: str):
for notebook_folder in self.notebooks_dir.iterdir():
if notebook_folder.is_dir():
sections_dir = notebook_folder / "sections"
if sections_dir.exists():
for section_folder in sections_dir.iterdir():
section_json = section_folder / "section.json"
if section_json.exists():
data = self._load_json(section_json)
if data.get("id") == section_id:
return data
return None
def get_pages_by_section(self, section_id: str):
section = self.get_section(section_id)
if not section:
return []
notebook_path = self.notebooks_dir / self._find_notebook_folder(section["notebook_id"])
section_path = notebook_path / "sections" / section["folder_name"]
pages_index = self._load_json(section_path / "pages.json")
pages = []
for page in pages_index.get("pages", []):
page_path = section_path / "pages" / page["folder"]
if page_path.exists():
data = self._load_json(page_path / "page.json")
data.pop("content", None)
pages.append(data)
pages.sort(key=lambda x: x.get("updated_at", ""), reverse=True)
return pages
def get_page(self, page_id: str):
for notebook_folder in self.notebooks_dir.iterdir():
if notebook_folder.is_dir():
sections_dir = notebook_folder / "sections"
if sections_dir.exists():
for section_folder in sections_dir.iterdir():
pages_dir = section_folder / "pages"
if pages_dir.exists():
for page_folder in pages_dir.iterdir():
page_json = page_folder / "page.json"
if page_json.exists():
data = self._load_json(page_json)
if data.get("id") == page_id:
content_path = page_folder / data.get("content_file", "content.html")
if content_path.exists():
with open(content_path, 'r', encoding='utf-8') as f:
data["content"] = f.read()
else:
data["content"] = ""
drawing_path = page_folder / data.get("drawing_file", "drawing.json")
if drawing_path.exists():
with open(drawing_path, 'r', encoding='utf-8') as f:
data["drawing_data"] = f.read()
else:
data["drawing_data"] = "[]"
return data
return None
def update_page(self, page_id: str, updates: dict):
page = self.get_page(page_id)
if not page:
return None
section = self.get_section(page["section_id"])
notebook_path = self.notebooks_dir / self._find_notebook_folder(section["notebook_id"])
section_path = notebook_path / "sections" / section["folder_name"]
page_path = section_path / "pages" / page["folder_name"]
data = self._load_json(page_path / "page.json")
if "title" in updates:
data["title"] = updates["title"]
if "content" in updates:
content_path = page_path / data.get("content_file", "content.html")
with open(content_path, 'w', encoding='utf-8') as f:
f.write(updates["content"])
if "drawing_data" in updates:
drawing_path = page_path / data.get("drawing_file", "drawing.json")
with open(drawing_path, 'w', encoding='utf-8') as f:
f.write(updates["drawing_data"])
data["updated_at"] = datetime.now().isoformat()
self._save_json(page_path / "page.json", data)
pages_index = self._load_json(section_path / "pages.json")
for p in pages_index["pages"]:
if p["id"] == page_id:
p["title"] = data["title"]
p["updated_at"] = data["updated_at"]
self._save_json(section_path / "pages.json", pages_index)
data["content"] = updates.get("content", "")
data["drawing_data"] = updates.get("drawing_data", "[]")
return data
self._save_json(section_path / "pages.json", pages_index)
data["content"] = updates.get("content", "")
return data
def delete_page(self, page_id: str) -> bool:
page = self.get_page(page_id)
if not page:
return False
section = self.get_section(page["section_id"])
notebook_path = self.notebooks_dir / self._find_notebook_folder(section["notebook_id"])
section_path = notebook_path / "sections" / section["folder_name"]
page_path = section_path / "pages" / page["folder_name"]
if page_path.exists():
shutil.rmtree(page_path)
pages_index = self._load_json(section_path / "pages.json")
pages_index["pages"] = [p for p in pages_index["pages"] if p["id"] != page_id]
self._save_json(section_path / "pages.json", pages_index)
return True
def _find_notebook_folder(self, notebook_id: str) -> str:
index = self._load_json(self.index_file)
for nb in index["notebooks"]:
if nb["id"] == notebook_id:
return nb["folder"]
raise ValueError(f"Notebook {notebook_id} non trouvé")
def _get_sections_list(self, notebook_path: Path):
sections_index = self._load_json(notebook_path / "sections.json")
return sections_index.get("sections", [])
file_manager = FileManager(DATASET_PATH)
@app.route('/')
def index():
return send_from_directory('static', 'index.html')
@app.route('/<path:path>')
def static_files(path):
return send_from_directory('static', path)
@app.route('/api/notebooks', methods=['GET', 'POST'])
def notebooks():
if request.method == 'POST':
data = request.json
notebook = file_manager.create_notebook(name=data.get('name', 'Nouveau'), color=data.get('color', '#7719AA'))
return jsonify(notebook), 201
return jsonify(file_manager.get_all_notebooks())
@app.route('/api/notebooks/<notebook_id>', methods=['GET', 'DELETE'])
def notebook_detail(notebook_id):
if request.method == 'GET':
notebook = file_manager.get_notebook(notebook_id)
if not notebook:
return jsonify({"error": "Not found"}), 404
return jsonify(notebook)
elif request.method == 'DELETE':
file_manager.delete_notebook(notebook_id)
return jsonify({"success": True})
@app.route('/api/notebooks/<notebook_id>/sections', methods=['GET', 'POST'])
def notebook_sections(notebook_id):
if request.method == 'POST':
data = request.json
section = file_manager.create_section(notebook_id, data.get('name', 'Nouvelle Section'))
if not section:
return jsonify({"error": "Notebook not found"}), 404
return jsonify(section), 201
return jsonify(file_manager.get_sections_by_notebook(notebook_id))
@app.route('/api/sections/<section_id>', methods=['DELETE'])
def section_detail(section_id):
file_manager.delete_section(section_id)
return jsonify({"success": True})
@app.route('/api/sections/<section_id>/pages', methods=['GET', 'POST'])
def section_pages(section_id):
if request.method == 'POST':
data = request.json
page = file_manager.create_page(section_id, data.get('title', 'Nouvelle Page'))
if not page:
return jsonify({"error": "Section not found"}), 404
return jsonify(page), 201
return jsonify(file_manager.get_pages_by_section(section_id))
@app.route('/api/pages/<page_id>', methods=['GET', 'PUT', 'DELETE'])
def page_detail(page_id):
if request.method == 'GET':
page = file_manager.get_page(page_id)
if not page:
return jsonify({"error": "Page not found"}), 404
return jsonify(page)
elif request.method == 'PUT':
data = request.json
page = file_manager.update_page(page_id, {
'title': data.get('title'),
'content': data.get('content'),
'drawing_data': data.get('drawing_data')
})
if not page:
return jsonify({"error": "Page not found"}), 404
return jsonify(page)
elif request.method == 'DELETE':
file_manager.delete_page(page_id)
return jsonify({"success": True})
if __name__ == '__main__':
port = int(os.environ.get('PORT', 7860))
app.run(host='0.0.0.0', port=port)