AbdulElahGwaith's picture
Upload folder using huggingface_hub
88df9e4 verified
import type { Response, NextFunction } from 'express'
import { defaultCacheControl } from '@/frame/middleware/cache-control'
import { ExtendedRequest } from '@/types'
// We'll check if the current request path is one of these, or ends with
// one of these.
// These are clearly intentional "guesses" made by some sort of
// pen-testing bot.
const JUNK_STARTS = ['///', '/\\', '/\\.']
const JUNK_ENDS = [
'/package.json',
'/package-lock.json',
'/etc/passwd',
'/Gemfile',
'/Gemfile.lock',
'/WEB-INF/web.xml',
'/WEB-INF/web.xml%C0%80.jsp',
]
const JUNK_PATHS = new Set([
...JUNK_ENDS,
'/env',
'/xmlrpc.php',
'/wp-login.php',
'/README.md',
'/server.js',
'/.git',
'/_next',
])
// Basename is the last token of the path when split by `/`.
// For example `/foo/bar/baz` has a basename of `baz`.
const JUNK_BASENAMES = new Set([
// E.g. /en/code-security/.env
'.env',
])
function isJunkPath(path: string) {
if (JUNK_PATHS.has(path)) return true
for (const junkPath of JUNK_STARTS) {
if (path.startsWith(junkPath)) {
return true
}
}
for (const junkPath of JUNK_ENDS) {
if (path.endsWith(junkPath)) {
return true
}
}
const basename = path.split('/').pop()
// E.g. `/billing/.env.local` or `/billing/.env_sample`
if (basename && /^\.env(.|_)[\w.]+/.test(basename)) return true
if (basename && JUNK_BASENAMES.has(basename)) return true
// Prevent various malicious injection attacks targeting Next.js
if (path.match(/^\/_next[^/]/) || path === '/_next/data' || path === '/_next/data/') {
return true
}
// We currently don't use next/image for any images.
// This could change in the future but right now can just 404 on these
// so we don't have to deal with any other errors.
if (path.startsWith('/_next/image')) {
return true
}
return false
}
export default function handleInvalidPaths(
req: ExtendedRequest,
res: Response,
next: NextFunction,
) {
if (isJunkPath(req.path)) {
// We can all the CDN to cache these responses because they're
// they're not going to suddenly work in the next deployment.
defaultCacheControl(res)
res.setHeader('content-type', 'text/plain')
res.status(404).send('Not found')
return
}
if (req.path.endsWith('/index.md')) {
defaultCacheControl(res)
// The originalUrl is the full URL including query string.
// E.g. `/en/foo.md?bar=baz`
const newUrl = req.originalUrl.replace(req.path, req.path.replace(/\/index\.md$/, ''))
return res.redirect(newUrl)
} else if (req.path.endsWith('.md')) {
// encode the query params but also make them pretty so we can see
// them as `/` and `@` in the address bar
// e.g. /api/article/body?pathname=/en/enterprise-server@3.16/admin...
// NOT: /api/article/body?pathname=%2Fen%2Fenterprise-server%403.16%2Fadmin...
const encodedPath = encodeURIComponent(req.path.replace(/\.md$/, ''))
.replace(/%2F/g, '/')
.replace(/%40/g, '@')
const newUrl = `/api/article/body?pathname=${encodedPath}`
res.redirect(newUrl)
return
}
return next()
}