AbdulElahGwaith's picture
Upload folder using huggingface_hub
88df9e4 verified
import type { RequestHandler, Response } from 'express'
import express from 'express'
import { defaultCacheControl } from '@/frame/middleware/cache-control'
import catchMiddlewareError from '@/observability/middleware/catch-middleware-error'
import { ExtendedRequestWithPageInfo } from '../types'
import {
pageValidationMiddleware,
pathValidationMiddleware,
apiVersionValidationMiddleware,
} from './validation'
import { getArticleBody } from './article-body'
import { getMetadata } from './article-pageinfo'
import {
makeLanguageSurrogateKey,
setFastlySurrogateKey,
SURROGATE_ENUMS,
} from '@/frame/middleware/set-fastly-surrogate-key'
import statsd from '@/observability/lib/statsd'
const router = express.Router()
// For all these routes in `/api/article`:
// - pathValidationMiddleware ensures the path is properly structured and handles errors when it's not
// - pageValidationMiddleware fetches the page from the pagelist, returns 404 to the user if not found
/**
* Get article metadata and content in a single object. Equivalent to calling `/article/meta` concatenated with `/article/body`.
* @route GET /api/article
* @param {string} pathname - Article path (e.g. '/en/get-started/article-name')
* @param {string} [apiVersion] - API version for REST pages (optional, defaults to latest)
* @returns {object} JSON object with article metadata and content (`meta` and `body` keys)
* @throws {Error} 403 - If the article body cannot be retrieved. Reason is given in the error message.
* @throws {Error} 400 - If pathname or apiVersion parameters are invalid.
* @throws {Error} 404 - If the path is valid, but the page couldn't be resolved.
* @example
* ❯ curl -s "https://docs.github.com/api/article?pathname=/en/get-started/start-your-journey/about-github-and-git"
* {
* "meta": {
* "title": "About GitHub and Git",
* "intro": "You can use GitHub and Git to collaborate on work.",
* "product": "Get started"
* },
* "body": "## About GitHub\n\nGitHub is a cloud-based platform where you can store, share, and work together with others to write code.\n\nStoring your code in a \"repository\" on GitHub allows you to:\n\n* **Showcase or share** your work.\n [...]"
* }
*/
router.get(
'/',
pathValidationMiddleware as RequestHandler,
pageValidationMiddleware as RequestHandler,
apiVersionValidationMiddleware as RequestHandler,
catchMiddlewareError(async function (req: ExtendedRequestWithPageInfo, res: Response) {
const { meta, cacheInfo } = await getMetadata(req)
let bodyContent
try {
bodyContent = await getArticleBody(req)
} catch (error) {
return res.status(403).json({ error: (error as Error).message })
}
incrementArticleLookup(req, 'full', cacheInfo)
defaultCacheControl(res)
return res.json({
meta,
body: bodyContent,
})
}),
)
/**
* Get the contents of an article's body.
* @route GET /api/article/body
* @param {string} pathname - Article path (e.g. '/en/get-started/article-name')
* @param {string} [apiVersion] - API version (optional, defaults to latest)
* @returns {string} Article body content in markdown format.
* @throws {Error} 403 - If the article body cannot be retrieved. Reason is given in the error message.
* @throws {Error} 400 - If pathname or apiVersion parameters are invalid.
* @throws {Error} 404 - If the path is valid, but the page couldn't be resolved.
* @example
* ❯ curl -s https://docs.github.com/api/article/body\?pathname=/en/get-started/start-your-journey/about-github-and-git
* ## About GitHub
*
* GitHub is a cloud-based platform where you can store, share, and work together with others to write code.
*
* Storing your code in a "repository" on GitHub allows you to:
* [...]
*/
router.get(
'/body',
pathValidationMiddleware as RequestHandler,
pageValidationMiddleware as RequestHandler,
apiVersionValidationMiddleware as RequestHandler,
catchMiddlewareError(async function (req: ExtendedRequestWithPageInfo, res: Response) {
let bodyContent
try {
bodyContent = await getArticleBody(req)
} catch (error) {
return res.status(403).json({ error: (error as Error).message })
}
incrementArticleLookup(req, 'body')
defaultCacheControl(res)
return res.type('text/markdown').send(bodyContent)
}),
)
/**
* Get metadata about an article.
* @route GET /api/article/meta
* @param {string} pathname - Article path (e.g. '/en/get-started/article-name')
* @returns {object} JSON object containing article metadata with title, intro, and product information.
* @throws {Error} 400 - If pathname parameter is invalid.
* @throws {Error} 404 - If the path is valid, but the page couldn't be resolved.
* @example
* ❯ curl -s "https://docs.github.com/api/article/meta?pathname=/en/get-started/start-your-journey/about-github-and-git"
* {
* "title": "About GitHub and Git",
* "intro": "You can use GitHub and Git to collaborate on work.",
* "product": "Get started",
* "breadcrumbs": [
* {
* "href": "/en/get-started",
* "title": "Get started"
* },
* {
* "href": "/en/get-started/start-your-journey",
* "title": "Start your journey"
* },
* {
* "href": "/en/get-started/start-your-journey/about-github-and-git",
* "title": "About GitHub and Git"
* }
* ]
* }
*/
router.get(
'/meta',
pathValidationMiddleware as RequestHandler,
pageValidationMiddleware as RequestHandler,
catchMiddlewareError(async function pageInfo(req: ExtendedRequestWithPageInfo, res: Response) {
const { meta, cacheInfo } = await getMetadata(req)
incrementArticleLookup(req, 'meta', cacheInfo)
defaultCacheControl(res)
// This is necessary so that the `Surrogate-Key` header is set with
// the correct language surrogate key bit. By default, it's set
// from the pathname but `/api/**` URLs don't have a language
// (other than the default 'en').
// We do this so that all of these URLs are cached in Fastly by language
// which we need for the staggered purge.
setFastlySurrogateKey(
res,
`${SURROGATE_ENUMS.DEFAULT} ${makeLanguageSurrogateKey(req.pageinfo?.page?.languageCode || 'en')}`,
true,
)
return res.json(meta)
}),
)
// this helps us standardize calls to our datadog agent for article api purposes
function incrementArticleLookup(
req: ExtendedRequestWithPageInfo,
type: 'full' | 'body' | 'meta',
cacheInfo?: string,
) {
const pathname = req.pageinfo.pathname
const language = req.pageinfo.page?.languageCode || 'en'
// logs the source of the request, if it's for hovercards it'll have the header X-Request-Source.
// see src/links/components/LinkPreviewPopover.tsx
const source =
req.get('X-Request-Source') ||
(req.get('Referer')
? `external-${new URL(req.get('Referer') || '').hostname || 'unknown'}`
: 'external')
const tags = [
// According to https://docs.datadoghq.com/getting_started/tagging/#define-tags
// the max length of a tag is 200 characters. Most of ours are less than
// that but we truncate just to be safe.
`pathname:${pathname}`.slice(0, 200),
`language:${language}`,
`type:${type}`,
`source:${source}`.slice(0, 200),
]
// the /article/meta endpoint uses a cache
if (cacheInfo) tags.push(`cache:${cacheInfo}`)
statsd.increment('api.article.lookup', 1, tags)
}
export default router