Spaces:
Sleeping
Sleeping
Update fn.py
Browse files
fn.py
CHANGED
|
@@ -14,8 +14,11 @@ import numpy as np
|
|
| 14 |
from scipy.spatial.distance import cdist
|
| 15 |
from duckduckgo_search import DDGS
|
| 16 |
from bs4 import BeautifulSoup
|
|
|
|
| 17 |
|
| 18 |
-
model_name = "
|
|
|
|
|
|
|
| 19 |
input_dir = 'input'
|
| 20 |
vectors_dir = 'vectors'
|
| 21 |
|
|
@@ -54,6 +57,13 @@ def bs4(url):
|
|
| 54 |
|
| 55 |
return text
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
def upload(name, filename, content):
|
| 58 |
os.makedirs(f"{input_dir}/{name}", exist_ok=True)
|
| 59 |
srcpath = f"{input_dir}/{name}/{filename}"
|
|
|
|
| 14 |
from scipy.spatial.distance import cdist
|
| 15 |
from duckduckgo_search import DDGS
|
| 16 |
from bs4 import BeautifulSoup
|
| 17 |
+
from markdownify import markdownify
|
| 18 |
|
| 19 |
+
model_name = "cl-nagoya/ruri-large"
|
| 20 |
+
# "mixedbread-ai/mxbai-embed-large-v1"
|
| 21 |
+
# "intfloat/multilingual-e5-large"
|
| 22 |
input_dir = 'input'
|
| 23 |
vectors_dir = 'vectors'
|
| 24 |
|
|
|
|
| 57 |
|
| 58 |
return text
|
| 59 |
|
| 60 |
+
def md(url):
|
| 61 |
+
html = requests.get(url).text.replace("\r", '')
|
| 62 |
+
mdtxt = markdownify(html)
|
| 63 |
+
mdtxt = re.sub("\n+", "\n", mdtxt, flags=(re.MULTILINE | re.DOTALL))
|
| 64 |
+
|
| 65 |
+
return mdtxt
|
| 66 |
+
|
| 67 |
def upload(name, filename, content):
|
| 68 |
os.makedirs(f"{input_dir}/{name}", exist_ok=True)
|
| 69 |
srcpath = f"{input_dir}/{name}/{filename}"
|