Spaces:
Sleeping
Sleeping
Sam-Oliveira commited on
Commit ·
7ce7422
1
Parent(s): 0fb074a
fix triton cache directory
Browse files- src/config.py +1 -1
- src/helpers.py +9 -3
- src/summarise.py +7 -3
src/config.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
from pathlib import Path
|
| 2 |
import pathlib,tempfile
|
| 3 |
# Root folder for DB
|
| 4 |
-
#PROJ = Path(__file__).parent
|
| 5 |
PROJ = pathlib.Path(tempfile.gettempdir()) # For Space
|
| 6 |
MAX_RESULTS = 10 #default number of results
|
| 7 |
|
|
|
|
| 1 |
from pathlib import Path
|
| 2 |
import pathlib,tempfile
|
| 3 |
# Root folder for DB
|
| 4 |
+
#PROJ = Path(__file__).parent # For MAC
|
| 5 |
PROJ = pathlib.Path(tempfile.gettempdir()) # For Space
|
| 6 |
MAX_RESULTS = 10 #default number of results
|
| 7 |
|
src/helpers.py
CHANGED
|
@@ -10,10 +10,16 @@ def render_rows(rows):
|
|
| 10 |
"""
|
| 11 |
blocks = []
|
| 12 |
for t, a, txt, pub in rows:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
blocks += [
|
| 14 |
-
f"<h3>{
|
| 15 |
-
f"<p><b>Authors:</b> {
|
| 16 |
-
f"<pre style='white-space:pre-wrap'>{
|
| 17 |
"<hr>"
|
| 18 |
]
|
| 19 |
return "\n".join(blocks) or "<p>No matching papers found.</p>"
|
|
|
|
| 10 |
"""
|
| 11 |
blocks = []
|
| 12 |
for t, a, txt, pub in rows:
|
| 13 |
+
# Handle None values by converting them to empty strings
|
| 14 |
+
title = html.escape(t) if t is not None else ""
|
| 15 |
+
authors = html.escape(a) if a is not None else ""
|
| 16 |
+
summary = html.escape(txt) if txt is not None else ""
|
| 17 |
+
published = pub[:10] if pub is not None else ""
|
| 18 |
+
|
| 19 |
blocks += [
|
| 20 |
+
f"<h3>{title}</h3>",
|
| 21 |
+
f"<p><b>Authors:</b> {authors} <br><i>{published}</i></p>",
|
| 22 |
+
f"<pre style='white-space:pre-wrap'>{summary}</pre>",
|
| 23 |
"<hr>"
|
| 24 |
]
|
| 25 |
return "\n".join(blocks) or "<p>No matching papers found.</p>"
|
src/summarise.py
CHANGED
|
@@ -2,7 +2,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
|
| 2 |
from db import get_conn
|
| 3 |
from config import MODEL_NAME
|
| 4 |
from helpers import rows_by_tag
|
| 5 |
-
|
|
|
|
|
|
|
| 6 |
"""
|
| 7 |
Summarise the abstract of a paper using a LLM. Further versions should instead summarise the full paper.
|
| 8 |
"""
|
|
@@ -16,12 +18,14 @@ PROMPT = (
|
|
| 16 |
|
| 17 |
# ---------------------------------------------------------------------- #
|
| 18 |
def load_pipe():
|
|
|
|
| 19 |
model = AutoModelForCausalLM.from_pretrained(
|
| 20 |
MODEL_NAME,
|
| 21 |
-
|
|
|
|
| 22 |
device_map="auto"
|
| 23 |
)
|
| 24 |
-
tok = AutoTokenizer.from_pretrained(MODEL_NAME)
|
| 25 |
tok.pad_token = tok.eos_token
|
| 26 |
return pipeline(
|
| 27 |
"text-generation",
|
|
|
|
| 2 |
from db import get_conn
|
| 3 |
from config import MODEL_NAME
|
| 4 |
from helpers import rows_by_tag
|
| 5 |
+
import os
|
| 6 |
+
import tempfile
|
| 7 |
+
import pathlib
|
| 8 |
"""
|
| 9 |
Summarise the abstract of a paper using a LLM. Further versions should instead summarise the full paper.
|
| 10 |
"""
|
|
|
|
| 18 |
|
| 19 |
# ---------------------------------------------------------------------- #
|
| 20 |
def load_pipe():
|
| 21 |
+
cache_dir = pathlib.Path(tempfile.gettempdir()) / "hf_cache"
|
| 22 |
model = AutoModelForCausalLM.from_pretrained(
|
| 23 |
MODEL_NAME,
|
| 24 |
+
cache_dir=cache_dir,
|
| 25 |
+
load_in_4bit=True,
|
| 26 |
device_map="auto"
|
| 27 |
)
|
| 28 |
+
tok = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=cache_dir)
|
| 29 |
tok.pad_token = tok.eos_token
|
| 30 |
return pipeline(
|
| 31 |
"text-generation",
|