api_search_articles / app /database.py
Loren's picture
Upload 6 files
dd0f502 verified
raw
history blame
1.76 kB
import sqlite3
from typing import List, Dict
import os
from huggingface_hub import hf_hub_download
# Télécharger le fichier SQLite depuis le dataset
# Créer un dossier temporaire pour le cache
# Répertoire writable dans le Space
cache_dir = "/tmp"
os.makedirs(cache_dir, exist_ok=True)
REPO_ID = "Loren/articles_db" # dataset HF
DB_NAME = 'articles.db'
hf_token = os.environ["API_HF_TOKEN"]
sqlite_path = hf_hub_download(
repo_id=REPO_ID,
filename=DB_NAME,
repo_type="dataset",
token=hf_token,
cache_dir=cache_dir
)
def get_connection(sqlite_path):
conn = sqlite3.connect(sqlite_path)
conn.row_factory = sqlite3.Row
return conn
def fetch_tags() -> List[str]:
"""Retourne tous les tags"""
conn = get_connection()
cur = conn.cursor()
cur.execute("SELECT tag_name FROM tags ORDER BY tag_name")
tags = [row["tag_name"] for row in cur.fetchall()]
conn.close()
return tags
def fetch_articles_by_tags(tags: List[str]) -> List[Dict]:
"""
Retourne les articles correspondant aux tags.
"""
if not tags:
return []
conn = get_connection()
conn.row_factory = sqlite3.Row
cur = conn.cursor()
# Créer la liste de placeholders "?" dynamiquement
placeholders = ",".join(["?"] * len(tags))
query = ("""SELECT a.article_id, a.article_title, a.article_url
FROM tags t, articles a, tag_article ta
WHERE ta.tag_id = t.tag_id
AND ta.article_id = a.article_id
AND t.tag_name IN (""" + placeholders + """)"""
)
cur.execute(query, tags)
results = [dict(row) for row in cur.fetchall()]
conn.close()
return results