Update build tooling, update imports, require Node 16 (#3970)

This commit is contained in:
Felix Boehm
2024-08-06 00:02:58 +01:00
committed by GitHub
parent e6c0988a44
commit caab0694ff
35 changed files with 2582 additions and 3773 deletions

View File

@@ -95,7 +95,7 @@
],
"parserOptions": {
"sourceType": "module",
"project": "./tsconfig.eslint.json"
"project": "./tsconfig.json"
},
"rules": {
"dot-notation": 0,
@@ -126,7 +126,7 @@
},
{
"files": "*.spec.ts",
"extends": "plugin:jest/recommended",
"extends": "plugin:vitest/legacy-recommended",
"rules": {
"@typescript-eslint/no-explicit-any": 0
}

View File

@@ -44,12 +44,12 @@ jobs:
- name: Install npm dependencies
run: npm ci
- name: Run Jest
run: npm run test:jest
- name: Run tests
run: npm run test:vi
if: matrix.node != env.NODE_COV
- name: Run Jest with coverage
run: npm run test:jest:cov
- name: Run tests with coverage
run: npm run test:vi -- --coverage
if: matrix.node == env.NODE_COV
- name: Run Coveralls

3
.gitignore vendored
View File

@@ -4,6 +4,7 @@ npm-debug.log
.docusaurus
.cache-loader
/coverage
/lib
/.tshy
/dist
/website/docs/api
/website/build

View File

@@ -1,83 +1,114 @@
import Suites from './suite'; // eslint-disable-line n/file-extension-in-import
import type { Cheerio } from '../src/cheerio.js';
import fs from 'node:fs/promises';
import { Script } from 'node:vm';
import { Bench } from 'tinybench';
import type { Element } from 'domhandler';
import type { Cheerio } from '../src/cheerio.js';
import type { CheerioAPI } from '../src/load.js';
import { JSDOM } from 'jsdom';
import { load } from '../src/index-browser.js';
const suites = new Suites();
const documentDir = new URL('documents/', import.meta.url);
const jQuerySrc = await fs.readFile(
new URL('../node_modules/jquery/dist/jquery.slim.js', import.meta.url),
'utf8',
);
const jQueryScript = new Script(jQuerySrc);
const filterIndex = process.argv.indexOf('--filter') + 1;
const benchmarkFilter = filterIndex >= 0 ? process.argv[filterIndex] : '';
const regexIdx = process.argv.indexOf('--regex') + 1;
if (regexIdx > 0) {
if (regexIdx === process.argv.length) {
throw new Error('Error: the "--regex" option requires a value');
const cheerioOnly = process.argv.includes('--cheerio-only');
interface SuiteOptions<T> {
test($: CheerioAPI, data: T): void;
setup($: CheerioAPI): T;
}
async function benchmark<T>(
name: string,
fileName: string,
options: SuiteOptions<T>,
): Promise<void> {
if (!name.includes(benchmarkFilter)) {
return;
}
suites.filter(process.argv[regexIdx]);
}
if (process.argv.includes('--cheerio-only')) {
suites.cheerioOnly();
const markup = await fs.readFile(new URL(fileName, documentDir), 'utf8');
console.log(`Test: ${name} (file: ${fileName})`);
const bench = new Bench();
const { test, setup } = options;
// Add Cheerio test
const $ = load(markup);
const setupData: T = setup($);
bench.add('cheerio', () => {
test($, setupData);
});
// Add JSDOM test
if (!cheerioOnly) {
const dom = new JSDOM(markup, { runScripts: 'outside-only' });
jQueryScript.runInContext(dom.getInternalVMContext());
const setupData: T = setup(dom.window['$']);
bench.add('jsdom', () => test(dom.window['$'], setupData));
}
await bench.warmup(); // Make results more reliable, ref: https://github.com/tinylibs/tinybench/pull/50
await bench.run();
console.table(bench.table());
}
suites.add<void>('Select all', 'jquery.html', {
setup() {
return;
},
test($) {
return $('*').length;
},
await benchmark<void>('Select all', 'jquery.html', {
setup() {},
test: ($) => $('*').length,
});
suites.add<void>('Select some', 'jquery.html', {
setup() {
return;
},
test($) {
return $('li').length;
},
await benchmark<void>('Select some', 'jquery.html', {
setup() {},
test: ($) => $('li').length,
});
/*
* Manipulation Tests
*/
suites.add<Cheerio<Element>>('manipulation - append', 'jquery.html', {
setup($) {
return $('body');
},
test(_, $body) {
$body.append('<div>'.repeat(50));
},
const DIVS_MARKUP = '<div>'.repeat(50);
await benchmark<Cheerio<Element>>('manipulation - append', 'jquery.html', {
setup: ($) => $('body'),
test: (_, $body) => $body.append(DIVS_MARKUP),
});
// These tests run out of memory in jsdom
suites.add<Cheerio<Element>>(
// JSDOM used to run out of memory on these tests
await benchmark<Cheerio<Element>>(
'manipulation - prepend - highmem',
'jquery.html',
{
setup($) {
return $('body');
},
test(_, $body) {
$body.prepend('<div>'.repeat(50));
},
setup: ($) => $('body'),
test: (_, $body) => $body.prepend(DIVS_MARKUP),
},
);
suites.add<Cheerio<Element>>('manipulation - after - highmem', 'jquery.html', {
setup($) {
return $('body');
await benchmark<Cheerio<Element>>(
'manipulation - after - highmem',
'jquery.html',
{
setup: ($) => $('body'),
test: (_, $body) => $body.after(DIVS_MARKUP),
},
test(_, $body) {
$body.after('<div>'.repeat(50));
);
await benchmark<Cheerio<Element>>(
'manipulation - before - highmem',
'jquery.html',
{
setup: ($) => $('body'),
test: (_, $body) => $body.before(DIVS_MARKUP),
},
});
suites.add<Cheerio<Element>>('manipulation - before - highmem', 'jquery.html', {
setup($) {
return $('body');
},
test(_, $body) {
$body.before('<div>'.repeat(50));
},
});
);
suites.add<Cheerio<Element>>('manipulation - remove', 'jquery.html', {
setup($) {
return $('body');
},
await benchmark<Cheerio<Element>>('manipulation - remove', 'jquery.html', {
setup: ($) => $('body'),
test($, $lis) {
const child = $('<div>');
$lis.append(child);
@@ -85,7 +116,7 @@ suites.add<Cheerio<Element>>('manipulation - remove', 'jquery.html', {
},
});
suites.add<void>('manipulation - replaceWith', 'jquery.html', {
await benchmark<void>('manipulation - replaceWith', 'jquery.html', {
setup($) {
$('body').append('<div id="foo">');
},
@@ -94,50 +125,34 @@ suites.add<void>('manipulation - replaceWith', 'jquery.html', {
},
});
suites.add<Cheerio<Element>>('manipulation - empty', 'jquery.html', {
setup($) {
return $('li');
},
await benchmark<Cheerio<Element>>('manipulation - empty', 'jquery.html', {
setup: ($) => $('li'),
test(_, $lis) {
$lis.empty();
},
});
suites.add<Cheerio<Element>>('manipulation - html', 'jquery.html', {
setup($) {
return $('li');
},
await benchmark<Cheerio<Element>>('manipulation - html', 'jquery.html', {
setup: ($) => $('li'),
test(_, $lis) {
$lis.html();
$lis.html('foo');
},
});
suites.add<Cheerio<Element>>('manipulation - html render', 'jquery.html', {
setup($) {
return $('body');
},
await benchmark<Cheerio<Element>>('manipulation - html render', 'jquery.html', {
setup: ($) => $('body'),
test(_, $lis) {
$lis.html();
},
});
suites.add<string>('manipulation - html independent', 'jquery.html', {
setup() {
return (
'<div class="foo"><div id="bar">bat<hr>baz</div> </div>' +
'<div class="foo"><div id="bar">bat<hr>baz</div> </div>' +
'<div class="foo"><div id="bar">bat<hr>baz</div> </div>' +
'<div class="foo"><div id="bar">bat<hr>baz</div> </div>' +
'<div class="foo"><div id="bar">bat<hr>baz</div> </div>' +
'<div class="foo"><div id="bar">bat<hr>baz</div> </div>'
);
},
test($, content) {
$(content).html();
},
const HTML_INDEPENDENT_MARKUP =
'<div class="foo"><div id="bar">bat<hr>baz</div> </div>'.repeat(6);
await benchmark<void>('manipulation - html independent', 'jquery.html', {
setup() {},
test: ($) => $(HTML_INDEPENDENT_MARKUP).html(),
});
suites.add<Cheerio<Element>>('manipulation - text', 'jquery.html', {
setup($) {
return $('li');
},
await benchmark<Cheerio<Element>>('manipulation - text', 'jquery.html', {
setup: ($) => $('li'),
test(_, $lis) {
$lis.text();
$lis.text('foo');
@@ -147,171 +162,103 @@ suites.add<Cheerio<Element>>('manipulation - text', 'jquery.html', {
/*
* Traversing Tests
*/
suites.add<Cheerio<Element>>('traversing - Find', 'jquery.html', {
setup($) {
return $('li');
},
test(_, $lis) {
return $lis.find('li').length;
},
await benchmark<Cheerio<Element>>('traversing - Find', 'jquery.html', {
setup: ($) => $('li'),
test: (_, $lis) => $lis.find('li').length,
});
suites.add<Cheerio<Element>>('traversing - Parent', 'jquery.html', {
setup($) {
return $('li');
},
test(_, $lis) {
return $lis.parent('div').length;
},
await benchmark<Cheerio<Element>>('traversing - Parent', 'jquery.html', {
setup: ($) => $('li'),
test: (_, $lis) => $lis.parent('div').length,
});
suites.add<Cheerio<Element>>('traversing - Parents', 'jquery.html', {
setup($) {
return $('li');
},
test(_, $lis) {
return $lis.parents('div').length;
},
await benchmark<Cheerio<Element>>('traversing - Parents', 'jquery.html', {
setup: ($) => $('li'),
test: (_, $lis) => $lis.parents('div').length,
});
suites.add<Cheerio<Element>>('traversing - Closest', 'jquery.html', {
setup($) {
return $('li');
},
test(_, $lis) {
return $lis.closest('div').length;
},
await benchmark<Cheerio<Element>>('traversing - Closest', 'jquery.html', {
setup: ($) => $('li'),
test: (_, $lis) => $lis.closest('div').length,
});
suites.add<Cheerio<Element>>('traversing - next', 'jquery.html', {
setup($) {
return $('li');
},
test(_, $lis) {
return $lis.next().length;
},
await benchmark<Cheerio<Element>>('traversing - next', 'jquery.html', {
setup: ($) => $('li'),
test: (_, $lis) => $lis.next().length,
});
suites.add<Cheerio<Element>>('traversing - nextAll', 'jquery.html', {
setup($) {
return $('li');
},
test(_, $lis) {
return $lis.nextAll('li').length;
},
await benchmark<Cheerio<Element>>('traversing - nextAll', 'jquery.html', {
setup: ($) => $('li'),
test: (_, $lis) => $lis.nextAll('li').length,
});
suites.add<Cheerio<Element>>('traversing - nextUntil', 'jquery.html', {
setup($) {
return $('li');
},
test(_, $lis) {
return $lis.nextUntil('li').length;
},
await benchmark<Cheerio<Element>>('traversing - nextUntil', 'jquery.html', {
setup: ($) => $('li'),
test: (_, $lis) => $lis.nextUntil('li').length,
});
suites.add<Cheerio<Element>>('traversing - prev', 'jquery.html', {
setup($) {
return $('li');
},
test(_, $lis) {
return $lis.prev().length;
},
await benchmark<Cheerio<Element>>('traversing - prev', 'jquery.html', {
setup: ($) => $('li'),
test: (_, $lis) => $lis.prev().length,
});
suites.add<Cheerio<Element>>('traversing - prevAll', 'jquery.html', {
setup($) {
return $('li');
},
test(_, $lis) {
return $lis.prevAll('li').length;
},
await benchmark<Cheerio<Element>>('traversing - prevAll', 'jquery.html', {
setup: ($) => $('li'),
test: (_, $lis) => $lis.prevAll('li').length,
});
suites.add<Cheerio<Element>>('traversing - prevUntil', 'jquery.html', {
setup($) {
return $('li');
},
test(_, $lis) {
return $lis.prevUntil('li').length;
},
await benchmark<Cheerio<Element>>('traversing - prevUntil', 'jquery.html', {
setup: ($) => $('li'),
test: (_, $lis) => $lis.prevUntil('li').length,
});
suites.add<Cheerio<Element>>('traversing - siblings', 'jquery.html', {
setup($) {
return $('li');
},
test(_, $lis) {
return $lis.siblings('li').length;
},
await benchmark<Cheerio<Element>>('traversing - siblings', 'jquery.html', {
setup: ($) => $('li'),
test: (_, $lis) => $lis.siblings('li').length,
});
suites.add<Cheerio<Element>>('traversing - Children', 'jquery.html', {
setup($) {
return $('li');
},
test(_, $lis) {
return $lis.children('a').length;
},
await benchmark<Cheerio<Element>>('traversing - Children', 'jquery.html', {
setup: ($) => $('li'),
test: (_, $lis) => $lis.children('a').length,
});
suites.add<Cheerio<Element>>('traversing - Filter', 'jquery.html', {
setup($) {
return $('li');
},
test(_, $lis) {
return $lis.filter('li').length;
},
await benchmark<Cheerio<Element>>('traversing - Filter', 'jquery.html', {
setup: ($) => $('li'),
test: (_, $lis) => $lis.filter('li').length,
});
suites.add<Cheerio<Element>>('traversing - First', 'jquery.html', {
setup($) {
return $('li');
},
test(_, $lis) {
return $lis.first().first().length;
},
await benchmark<Cheerio<Element>>('traversing - First', 'jquery.html', {
setup: ($) => $('li'),
test: (_, $lis) => $lis.first().first().length,
});
suites.add<Cheerio<Element>>('traversing - Last', 'jquery.html', {
setup($) {
return $('li');
},
test(_, $lis) {
return $lis.last().last().length;
},
await benchmark<Cheerio<Element>>('traversing - Last', 'jquery.html', {
setup: ($) => $('li'),
test: (_, $lis) => $lis.last().last().length,
});
suites.add<Cheerio<Element>>('traversing - Eq', 'jquery.html', {
setup($) {
return $('li');
},
test(_, $lis) {
return $lis.eq(0).eq(0).length;
},
await benchmark<Cheerio<Element>>('traversing - Eq', 'jquery.html', {
setup: ($) => $('li'),
test: (_, $lis) => $lis.eq(0).eq(0).length,
});
/*
* Attributes Tests
*/
suites.add<Cheerio<Element>>('attributes - Attributes', 'jquery.html', {
setup($) {
return $('li');
},
await benchmark<Cheerio<Element>>('attributes - Attributes', 'jquery.html', {
setup: ($) => $('li'),
test(_, $lis) {
$lis.attr('foo', 'bar');
$lis.attr('foo');
$lis.removeAttr('foo');
},
});
suites.add<Cheerio<Element>>('attributes - Single Attribute', 'jquery.html', {
setup($) {
return $('body');
},
test(_, $lis) {
$lis.attr('foo', 'bar');
$lis.attr('foo');
$lis.removeAttr('foo');
},
});
suites.add<Cheerio<Element>>('attributes - Data', 'jquery.html', {
setup($) {
return $('li');
await benchmark<Cheerio<Element>>(
'attributes - Single Attribute',
'jquery.html',
{
setup: ($) => $('body'),
test(_, $lis) {
$lis.attr('foo', 'bar');
$lis.attr('foo');
$lis.removeAttr('foo');
},
},
);
await benchmark<Cheerio<Element>>('attributes - Data', 'jquery.html', {
setup: ($) => $('li'),
test(_, $lis) {
$lis.data('foo', 'bar');
$lis.data('foo');
},
});
suites.add<Cheerio<Element>>('attributes - Val', 'jquery.html', {
setup($) {
return $('select,input,textarea,option');
},
await benchmark<Cheerio<Element>>('attributes - Val', 'jquery.html', {
setup: ($) => $('select,input,textarea,option'),
test($, $lis) {
$lis.each(function () {
$(this).val();
@@ -320,28 +267,22 @@ suites.add<Cheerio<Element>>('attributes - Val', 'jquery.html', {
},
});
suites.add<Cheerio<Element>>('attributes - Has class', 'jquery.html', {
setup($) {
return $('li');
},
test(_, $lis) {
$lis.hasClass('foo');
},
await benchmark<Cheerio<Element>>('attributes - Has class', 'jquery.html', {
setup: ($) => $('li'),
test: (_, $lis) => $lis.hasClass('foo'),
});
suites.add<Cheerio<Element>>('attributes - Toggle class', 'jquery.html', {
setup($) {
return $('li');
},
test(_, $lis) {
$lis.toggleClass('foo');
},
await benchmark<Cheerio<Element>>('attributes - Toggle class', 'jquery.html', {
setup: ($) => $('li'),
test: (_, $lis) => $lis.toggleClass('foo'),
});
suites.add<Cheerio<Element>>('attributes - Add Remove class', 'jquery.html', {
setup($) {
return $('li');
await benchmark<Cheerio<Element>>(
'attributes - Add Remove class',
'jquery.html',
{
setup: ($) => $('li'),
test(_, $lis) {
$lis.addClass('foo');
$lis.removeClass('foo');
},
},
test(_, $lis) {
$lis.addClass('foo');
$lis.removeClass('foo');
},
});
);

View File

@@ -1,95 +0,0 @@
import fs from 'node:fs';
import path from 'node:path';
import { Script } from 'node:vm';
import { Suite, type Event } from 'benchmark';
import { JSDOM } from 'jsdom';
import cheerio from '../lib/index.js';
const documentDir = path.join(__dirname, 'documents');
const jQuerySrc = fs.readFileSync(
path.join(__dirname, '../node_modules/jquery/dist/jquery.slim.js'),
'utf8',
);
const jQueryScript = new Script(jQuerySrc);
let filterRe = /./;
let cheerioOnly = false;
interface SuiteOptions<T> {
test($: typeof cheerio, data: T): void;
setup($: typeof cheerio): T;
}
export default class Suites {
filter(str: string): void {
filterRe = new RegExp(str, 'i');
}
cheerioOnly(): void {
cheerioOnly = true;
}
add<T>(name: string, fileName: string, options: SuiteOptions<T>): void {
if (!filterRe.test(name)) {
return;
}
const markup = fs.readFileSync(path.join(documentDir, fileName), 'utf8');
const suite = new Suite(name);
suite.on('start', () => {
console.log(`Test: ${name} (file: ${fileName})`);
});
suite.on('cycle', (event: Event) => {
if ((event.target as any).error) {
return;
}
console.log(`\t${String(event.target)}`);
});
suite.on('error', (event: Event) => {
console.log(`*** Error in ${event.target.name}: ***`);
console.log(`\t${(event.target as any).error}`);
console.log('*** Test invalidated. ***');
});
suite.on('complete', function (this: Suite, event: Event) {
if ((event.target as any).error) {
console.log();
return;
}
console.log(`\tFastest: ${(this.filter('fastest') as any)[0].name}\n`);
});
this._benchCheerio(suite, markup, options);
if (cheerioOnly) {
suite.run();
} else {
this._benchJsDom(suite, markup, options);
}
}
_benchJsDom<T>(suite: Suite, markup: string, options: SuiteOptions<T>): void {
const testFn = options.test;
const dom = new JSDOM(markup, { runScripts: 'outside-only' });
jQueryScript.runInContext(dom.getInternalVMContext());
const setupData: T = options.setup(dom.window['$']);
suite.add('jsdom', () => testFn(dom.window['$'], setupData));
suite.run();
}
_benchCheerio<T>(
suite: Suite,
markup: string,
options: SuiteOptions<T>,
): void {
const $ = cheerio.load(markup);
const testFn = options.test;
const setupData: T = options.setup($);
suite.add('cheerio', () => {
testFn($, setupData);
});
}
}

4342
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -2,12 +2,6 @@
"name": "cheerio",
"version": "1.0.0-rc.12",
"description": "The fast, flexible & elegant library for parsing and manipulating HTML and XML.",
"author": "Matt Mueller <mattmuelle@gmail.com>",
"maintainers": [
"Felix Boehm <me@feedic.com>"
],
"funding": "https://github.com/cheeriojs/cheerio?sponsor=1",
"license": "MIT",
"keywords": [
"htmlparser",
"jquery",
@@ -18,44 +12,104 @@
"xml",
"html"
],
"homepage": "https://cheerio.js.org/",
"bugs": {
"url": "https://github.com/cheeriojs/cheerio/issues"
},
"repository": {
"type": "git",
"url": "git://github.com/cheeriojs/cheerio.git"
},
"bugs": {
"url": "https://github.com/cheeriojs/cheerio/issues"
},
"homepage": "https://cheerio.js.org/",
"main": "lib/index.js",
"types": "lib/index.d.ts",
"module": "lib/esm/index.js",
"type": "commonjs",
"funding": "https://github.com/cheeriojs/cheerio?sponsor=1",
"license": "MIT",
"author": "Matt Mueller <mattmuelle@gmail.com>",
"maintainers": [
"Felix Boehm <me@feedic.com>"
],
"type": "module",
"exports": {
".": {
"require": "./lib/index.js",
"browser": {
"types": "./dist/browser/index.d.ts",
"default": "./dist/browser/index.js"
},
"import": {
"node": "./lib/esm/batteries.js",
"default": "./lib/esm/index.js"
"types": "./dist/esm/index.d.ts",
"default": "./dist/esm/index.js"
},
"require": {
"types": "./dist/commonjs/index.d.ts",
"default": "./dist/commonjs/index.js"
}
},
"./lib/slim": {
"require": "./lib/slim.js",
"import": "./lib/esm/slim.js"
"./slim": {
"browser": {
"types": "./dist/browser/slim.d.ts",
"default": "./dist/browser/slim.js"
},
"import": {
"types": "./dist/esm/slim.d.ts",
"default": "./dist/esm/slim.js"
},
"require": {
"types": "./dist/commonjs/slim.d.ts",
"default": "./dist/commonjs/slim.js"
}
},
"./lib/utils": {
"require": "./lib/utils.js",
"import": "./lib/esm/utils.js"
},
"./lib/batteries": {
"require": "./lib/batteries.js",
"import": "./lib/esm/batteries.js"
"./utils": {
"browser": {
"types": "./dist/browser/utils.d.ts",
"default": "./dist/browser/utils.js"
},
"import": {
"types": "./dist/esm/utils.d.ts",
"default": "./dist/esm/utils.js"
},
"require": {
"types": "./dist/commonjs/utils.d.ts",
"default": "./dist/commonjs/utils.js"
}
}
},
"main": "./dist/commonjs/index.js",
"module": "./dist/esm/index.js",
"types": "./dist/commonjs/index.d.ts",
"files": [
"lib"
],
"engines": {
"node": ">= 6"
"scripts": {
"benchmark": "node --import=tsx benchmark/benchmark.ts",
"build": "tshy",
"format": "npm run format:es && npm run format:prettier",
"format:es": "npm run lint:es -- --fix",
"format:prettier": "npm run format:prettier:raw -- --write",
"format:prettier:raw": "prettier \"**/*.{{m,c,}{j,t}s{x,},md{x,},json,y{a,}ml}\" --ignore-path .gitignore",
"lint": "npm run lint:es && npm run lint:prettier",
"lint:es": "eslint --ignore-path .gitignore .",
"lint:prettier": "npm run format:prettier:raw -- --check",
"prepare": "husky install",
"prepublishOnly": "npm run build",
"test": "npm run lint && npm run test:vi",
"test:vi": "vitest run",
"update-sponsors": "tsx scripts/fetch-sponsors.mts"
},
"lint-staged": {
"*.js": [
"prettier --write",
"npm run lint:es -- --fix"
],
"*.{json,md,ts,yml}": [
"prettier --write"
]
},
"prettier": {
"plugins": [
"./node_modules/prettier-plugin-jsdoc/dist/index.js"
],
"proseWrap": "always",
"singleQuote": true,
"tabWidth": 2,
"tsdoc": true
},
"dependencies": {
"cheerio-select": "^2.1.0",
@@ -73,84 +127,48 @@
"devDependencies": {
"@imgix/js-core": "^3.8.0",
"@octokit/graphql": "^8.1.1",
"@types/benchmark": "^2.1.5",
"@types/jest": "^29.5.12",
"@types/jsdom": "^21.1.7",
"@types/node": "^22.1.0",
"@types/whatwg-mimetype": "^3.0.2",
"@typescript-eslint/eslint-plugin": "^7.18.0",
"@typescript-eslint/parser": "^7.18.0",
"benchmark": "^2.1.4",
"@vitest/coverage-v8": "^2.0.5",
"eslint": "^8.57.0",
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-expect-type": "^0.4.0",
"eslint-plugin-jest": "^28.7.0",
"eslint-plugin-jsdoc": "^48.11.0",
"eslint-plugin-n": "^16.6.2",
"eslint-plugin-unicorn": "^55.0.0",
"eslint-plugin-vitest": "^0.5.4",
"husky": "^8.0.3",
"jest": "^29.7.0",
"jquery": "^3.7.1",
"jsdom": "^24.1.1",
"lint-staged": "^15.2.8",
"prettier": "^3.3.3",
"prettier-plugin-jsdoc": "^1.3.0",
"ts-jest": "^29.2.4",
"tinybench": "^2.9.0",
"tshy": "^3.0.2",
"tsx": "^4.16.5",
"typescript": "^5.5.4"
"typescript": "^5.5.4",
"vitest": "^2.0.5"
},
"scripts": {
"test": "npm run lint && npm run test:jest",
"test:jest": "jest",
"test:jest:cov": "npm run test:jest -- --coverage",
"lint": "npm run lint:es && npm run lint:prettier",
"lint:es": "eslint --ignore-path .gitignore .",
"lint:prettier": "npm run format:prettier:raw -- --check",
"format": "npm run format:es && npm run format:prettier",
"format:es": "npm run lint:es -- --fix",
"format:prettier": "npm run format:prettier:raw -- --write",
"format:prettier:raw": "prettier \"**/*.{{m,c,}{j,t}s{x,},md{x,},json,y{a,}ml}\" --ignore-path .gitignore",
"benchmark": "npm run build:cjs && ts-node benchmark/benchmark.ts --regex \"^(?!.*highmem)\"",
"update-sponsors": "tsx scripts/fetch-sponsors.mts",
"bench": "npm run benchmark",
"build": "npm run build:cjs && npm run build:esm",
"make-esm": "sed 's/\"type\": \"commonjs\"/\"type\": \"module\"/g' package.json > tmp.json && mv tmp.json package.json",
"make-cjs": "sed 's/\"type\": \"module\"/\"type\": \"commonjs\"/g' package.json > tmp.json && mv tmp.json package.json",
"build:cjs": "tsc --sourceRoot https://raw.githubusercontent.com/cheeriojs/cheerio/$(git rev-parse HEAD)/src/",
"build:esm": "npm run make-esm && npm run build:cjs -- --target es2019 --outDir lib/esm && echo '{\"type\":\"module\"}' > lib/esm/package.json; npm run make-cjs",
"prepublishOnly": "npm run build",
"prepare": "husky install"
"engines": {
"node": ">= 16"
},
"prettier": {
"singleQuote": true,
"tabWidth": 2,
"proseWrap": "always",
"plugins": [
"./node_modules/prettier-plugin-jsdoc/dist/index.js"
"tshy": {
"esmDialects": [
"browser"
],
"tsdoc": true
},
"lint-staged": {
"*.js": [
"prettier --write",
"npm run lint:es -- --fix"
],
"*.{json,md,ts,yml}": [
"prettier --write"
"exports": {
".": "./src/index.ts",
"./slim": "./src/slim.ts",
"./utils": "./src/utils.ts"
},
"exclude": [
"**/*.spec.ts",
"**/__fixtures__/*",
"**/__tests__/*",
"**/__snapshots__/*"
]
},
"jest": {
"preset": "ts-jest",
"testEnvironment": "node",
"testPathIgnorePatterns": [
"/__fixtures__/"
],
"coverageProvider": "v8",
"moduleNameMapper": {
"^(.*)\\.js$": [
"$1.js",
"$1"
]
}
}
}

View File

@@ -3,6 +3,7 @@
* removed in the next major release of Cheerio, but their stability should be
* maintained until that time.
*/
import { describe, it, expect, beforeEach } from 'vitest';
import * as fixtures from '../__fixtures__/fixtures.js';
import cheerio from '../index.js';

View File

@@ -1,14 +1,15 @@
import cheerio from '../index.js';
import { describe, it, expect } from 'vitest';
import { load } from '../index.js';
import type { CheerioOptions } from '../options.js';
function xml(str: string, options?: CheerioOptions) {
options = { xml: true, ...options };
const $ = cheerio.load(str, options);
const $ = load(str, options);
return $.xml();
}
function dom(str: string, options?: CheerioOptions) {
const $ = cheerio.load('', options);
const $ = load('', options);
return $(str).html();
}
@@ -33,7 +34,7 @@ describe('render', () => {
});
it('should render HTML as XML', () => {
const $ = cheerio.load('<foo></foo>', null, false);
const $ = load('<foo></foo>', null, false);
expect($.xml()).toBe('<foo/>');
});
});
@@ -55,7 +56,7 @@ describe('render', () => {
it('should maintain the parsing options of distinct contexts independently', () => {
const str = '<g><someElem someAttribute="something">hello</someElem></g>';
const $ = cheerio.load('', { xml: false });
const $ = load('', { xml: false });
expect($(str).html()).toBe(
'<someelem someattribute="something">hello</someelem>',

View File

@@ -1,3 +1,4 @@
import { describe, it, expect, beforeEach } from 'vitest';
import cheerio, { load, type CheerioAPI, type Cheerio } from '../index.js';
import type { Element } from 'domhandler';
import {

View File

@@ -1,3 +1,4 @@
import { describe, it, expect, beforeEach } from 'vitest';
import cheerio, { load, type Cheerio } from '../index.js';
import type { Element } from 'domhandler';
import { mixedText } from '../__fixtures__/fixtures.js';

View File

@@ -1,5 +1,6 @@
import { describe, it, expect } from 'vitest';
import * as fixtures from '../__fixtures__/fixtures.js';
import cheerio from '..';
import cheerio from '../index-browser.js';
interface RedSelObject {
red: string | undefined;

View File

@@ -1,3 +1,4 @@
import { describe, it, expect, beforeEach } from 'vitest';
import cheerio, { type CheerioAPI } from '../index.js';
import { forms } from '../__fixtures__/fixtures.js';

View File

@@ -1,3 +1,4 @@
import { describe, it, expect, beforeEach } from 'vitest';
import { load, type CheerioAPI, type Cheerio } from '../index.js';
import {
fruits,

View File

@@ -1,3 +1,4 @@
import { describe, it, expect, beforeEach } from 'vitest';
import cheerio, { type CheerioAPI } from '../index.js';
import { Cheerio } from '../cheerio.js';
import { type AnyNode, type Element, type Text, isText } from 'domhandler';

View File

@@ -1,167 +0,0 @@
/* eslint-disable jest/no-done-callback */
import * as cheerio from './batteries.js';
import { Writable } from 'node:stream';
import { createServer, type Server } from 'node:http';
function noop() {
// Ignore
}
const TEST_HTML = '<h1>Hello World</h1>';
const TEST_HTML_UTF16 = Buffer.from(TEST_HTML, 'utf16le');
const TEST_HTML_UTF16_BOM = Buffer.from([
// UTF16-LE BOM
0xff,
0xfe,
...Array.from(TEST_HTML_UTF16),
]);
describe('loadBuffer', () => {
it('should parse UTF-8 HTML', () => {
const $ = cheerio.loadBuffer(Buffer.from(TEST_HTML));
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
});
it('should parse UTF-16 HTML', () => {
const $ = cheerio.loadBuffer(TEST_HTML_UTF16_BOM);
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
});
});
describe('stringStream', () => {
it('should use parse5 by default', (cb) => {
const stream = cheerio.stringStream({}, (err, $) => {
expect(err).toBeUndefined();
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
cb();
});
expect(stream).toBeInstanceOf(Writable);
stream.end(TEST_HTML);
});
it('should error from parse5 on buffer', () => {
const stream = cheerio.stringStream({}, noop);
expect(stream).toBeInstanceOf(Writable);
expect(() => stream.write(Buffer.from(TEST_HTML))).toThrow(
'Parser can work only with string streams.',
);
});
it('should use htmlparser2 for XML', (cb) => {
const stream = cheerio.stringStream({ xml: true }, (err, $) => {
expect(err).toBeNull();
expect($.html()).toBe(TEST_HTML);
cb();
});
expect(stream).toBeInstanceOf(Writable);
stream.end(TEST_HTML);
});
});
describe('decodeStream', () => {
it('should use parse5 by default', (cb) => {
const stream = cheerio.decodeStream({}, (err, $) => {
expect(err).toBeUndefined();
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
cb();
});
expect(stream).toBeInstanceOf(Writable);
stream.end(TEST_HTML_UTF16_BOM);
});
it('should use htmlparser2 for XML', (cb) => {
const stream = cheerio.decodeStream({ xmlMode: true }, (err, $) => {
expect(err).toBeNull();
expect($.html()).toBe(TEST_HTML);
cb();
});
expect(stream).toBeInstanceOf(Writable);
stream.end(TEST_HTML_UTF16_BOM);
});
});
describe('fromURL', () => {
let server: Server | undefined;
function createTestServer(
contentType: string,
body: string | Buffer,
): Promise<number> {
return new Promise((resolve, reject) => {
server = createServer((_req, res) => {
res.writeHead(200, { 'Content-Type': contentType });
res.end(body);
});
server.listen(0, () => {
const address = server?.address();
if (typeof address === 'string' || address == null) {
reject(new Error('Failed to get port'));
} else {
resolve(address.port);
}
});
});
}
afterEach((cb) => {
if (server) {
server.close(cb);
server = undefined;
} else {
cb();
}
});
it('should fetch UTF-8 HTML', async () => {
const port = await createTestServer('text/html', TEST_HTML);
const $ = await cheerio.fromURL(`http://localhost:${port}`);
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
});
it('should fetch UTF-16 HTML', async () => {
const port = await createTestServer(
'text/html; charset=utf-16le',
TEST_HTML_UTF16,
);
const $ = await cheerio.fromURL(`http://localhost:${port}`);
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
});
it('should parse XML based on Content-Type', async () => {
const port = await createTestServer('text/xml', TEST_HTML);
const $ = await cheerio.fromURL(`http://localhost:${port}`);
expect($.html()).toBe(TEST_HTML);
});
});

View File

@@ -1,265 +0,0 @@
/**
* @file Batteries-included version of Cheerio. This module includes several
* convenience methods for loading documents from various sources.
*/
export * from './index.js';
/* eslint-disable n/no-unsupported-features/node-builtins */
import type { CheerioAPI, CheerioOptions } from './index.js';
import { load } from './index.js';
import { flattenOptions, type InternalOptions } from './options.js';
import { adapter as htmlparser2Adapter } from 'parse5-htmlparser2-tree-adapter';
import * as htmlparser2 from 'htmlparser2';
import { ParserStream as Parse5Stream } from 'parse5-parser-stream';
import {
decodeBuffer,
DecodeStream,
type SnifferOptions,
} from 'encoding-sniffer';
import * as undici from 'undici';
import MIMEType from 'whatwg-mimetype';
import { Writable, finished } from 'node:stream';
/**
* Sniffs the encoding of a buffer, then creates a querying function bound to a
* document created from the buffer.
*
* @category Loading
* @example
*
* ```js
* import * as cheerio from 'cheerio';
*
* const buffer = fs.readFileSync('index.html');
* const $ = cheerio.fromBuffer(buffer);
* ```
*
* @param buffer - The buffer to sniff the encoding of.
* @param options - The options to pass to Cheerio.
* @returns The loaded document.
*/
export function loadBuffer(
buffer: Buffer,
options: DecodeStreamOptions = {},
): CheerioAPI {
const opts = flattenOptions(options);
const str = decodeBuffer(buffer, {
defaultEncoding: opts?.xmlMode ? 'utf8' : 'windows-1252',
...options.encoding,
});
return load(str, opts);
}
function _stringStream(
options: InternalOptions | undefined,
cb: (err: Error | null | undefined, $: CheerioAPI) => void,
): Writable {
if (options?._useHtmlParser2) {
const parser = htmlparser2.createDocumentStream(
(err, document) => cb(err, load(document)),
options,
);
return new Writable({
decodeStrings: false,
write(chunk, _encoding, callback) {
if (typeof chunk !== 'string') {
throw new TypeError('Expected a string');
}
parser.write(chunk);
callback();
},
final(callback) {
parser.end();
callback();
},
});
}
options ??= {};
options.treeAdapter ??= htmlparser2Adapter;
if (options.scriptingEnabled !== false) {
options.scriptingEnabled = true;
}
const stream = new Parse5Stream(options);
finished(stream, (err) => cb(err, load(stream.document)));
return stream;
}
/**
* Creates a stream that parses a sequence of strings into a document.
*
* The stream is a `Writable` stream that accepts strings. When the stream is
* finished, the callback is called with the loaded document.
*
* @category Loading
* @example
*
* ```js
* import * as cheerio from 'cheerio';
* import * as fs from 'fs';
*
* const writeStream = cheerio.stringStream({}, (err, $) => {
* if (err) {
* // Handle error
* }
*
* console.log($('h1').text());
* // Output: Hello, world!
* });
*
* fs.createReadStream('my-document.html', { encoding: 'utf8' }).pipe(
* writeStream,
* );
* ```
*
* @param options - The options to pass to Cheerio.
* @param cb - The callback to call when the stream is finished.
* @returns The writable stream.
*/
export function stringStream(
options: CheerioOptions,
cb: (err: Error | null | undefined, $: CheerioAPI) => void,
): Writable {
return _stringStream(flattenOptions(options), cb);
}
export interface DecodeStreamOptions extends CheerioOptions {
encoding?: SnifferOptions;
}
/**
* Parses a stream of buffers into a document.
*
* The stream is a `Writable` stream that accepts buffers. When the stream is
* finished, the callback is called with the loaded document.
*
* @category Loading
* @param options - The options to pass to Cheerio.
* @param cb - The callback to call when the stream is finished.
* @returns The writable stream.
*/
export function decodeStream(
options: DecodeStreamOptions,
cb: (err: Error | null | undefined, $: CheerioAPI) => void,
): Writable {
const { encoding = {}, ...cheerioOptions } = options;
const opts = flattenOptions(cheerioOptions);
// Set the default encoding to UTF-8 for XML mode
encoding.defaultEncoding ??= opts?.xmlMode ? 'utf8' : 'windows-1252';
const decodeStream = new DecodeStream(encoding);
const loadStream = _stringStream(opts, cb);
decodeStream.pipe(loadStream);
return decodeStream;
}
type UndiciStreamOptions = Parameters<typeof undici.stream>[1];
export interface CheerioRequestOptions extends DecodeStreamOptions {
/** The options passed to `undici`'s `stream` method. */
requestOptions?: UndiciStreamOptions;
}
const defaultRequestOptions: UndiciStreamOptions = {
method: 'GET',
// Allow redirects by default
maxRedirections: 5,
// NOTE: `throwOnError` currently doesn't work https://github.com/nodejs/undici/issues/1753
throwOnError: true,
// Set an Accept header
headers: {
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
},
};
/**
* `fromURL` loads a document from a URL.
*
* By default, redirects are allowed and non-2xx responses are rejected.
*
* @category Loading
* @example
*
* ```js
* import * as cheerio from 'cheerio';
*
* const $ = await cheerio.fromURL('https://example.com');
* ```
*
* @param url - The URL to load the document from.
* @param options - The options to pass to Cheerio.
* @returns The loaded document.
*/
export async function fromURL(
url: string | URL,
options: CheerioRequestOptions = {},
): Promise<CheerioAPI> {
const {
requestOptions = defaultRequestOptions,
encoding = {},
...cheerioOptions
} = options;
let undiciStream: Promise<undici.Dispatcher.StreamData> | undefined;
// Add headers if none were supplied.
requestOptions.headers ??= defaultRequestOptions.headers;
const promise = new Promise<CheerioAPI>((resolve, reject) => {
undiciStream = undici.stream(url, requestOptions, (res) => {
const contentType = res.headers['content-type'] ?? 'text/html';
const mimeType = new MIMEType(
Array.isArray(contentType) ? contentType[0] : contentType,
);
if (!mimeType.isHTML() && !mimeType.isXML()) {
throw new RangeError(
`The content-type "${contentType}" is neither HTML nor XML.`,
);
}
// Forward the charset from the header to the decodeStream.
encoding.transportLayerEncodingLabel = mimeType.parameters.get('charset');
/*
* If we allow redirects, we will have entries in the history.
* The last entry will be the final URL.
*/
const history = (
res.context as
| {
history?: URL[];
}
| undefined
)?.history;
const opts = {
encoding,
// Set XML mode based on the MIME type.
xmlMode: mimeType.isXML(),
// Set the `baseURL` to the final URL.
baseURL: history ? history[history.length - 1] : url,
...cheerioOptions,
};
return decodeStream(opts, (err, $) => (err ? reject(err) : resolve($)));
});
});
// Let's make sure the request is completed before returning the promise.
await undiciStream;
return promise;
}

View File

@@ -1,3 +1,4 @@
import { describe, it, expect } from 'vitest';
import { parseDOM } from 'htmlparser2';
import cheerio, { type Cheerio } from './index.js';
import { fruits, food, noscript } from './__fixtures__/fixtures.js';
@@ -66,19 +67,19 @@ describe('cheerio', () => {
expect($script[0].childNodes).toHaveLength(0);
});
// eslint-disable-next-line jest/expect-expect
// eslint-disable-next-line vitest/expect-expect
it('should be able to select .apple with only a context', () => {
const $apple = cheerio('.apple', fruits);
testAppleSelect($apple);
});
// eslint-disable-next-line jest/expect-expect
// eslint-disable-next-line vitest/expect-expect
it('should be able to select .apple with a node as context', () => {
const $apple = cheerio('.apple', cheerio(fruits)[0]);
testAppleSelect($apple);
});
// eslint-disable-next-line jest/expect-expect
// eslint-disable-next-line vitest/expect-expect
it('should be able to select .apple with only a root', () => {
const $apple = cheerio('.apple', null, fruits);
testAppleSelect($apple);
@@ -123,19 +124,19 @@ describe('cheerio', () => {
});
});
// eslint-disable-next-line jest/expect-expect
// eslint-disable-next-line vitest/expect-expect
it('should be able to do: cheerio("#fruits .apple")', () => {
const $apple = cheerio('#fruits .apple', fruits);
testAppleSelect($apple);
});
// eslint-disable-next-line jest/expect-expect
// eslint-disable-next-line vitest/expect-expect
it('should be able to do: cheerio("li.apple")', () => {
const $apple = cheerio('li.apple', fruits);
testAppleSelect($apple);
});
// eslint-disable-next-line jest/expect-expect
// eslint-disable-next-line vitest/expect-expect
it('should be able to select by attributes', () => {
const $apple = cheerio('li[class=apple]', fruits);
testAppleSelect($apple);
@@ -404,7 +405,7 @@ describe('cheerio', () => {
describe('parse5 options', () => {
// Should parse noscript tags only with false option value
test('{scriptingEnabled: ???}', () => {
it('{scriptingEnabled: ???}', () => {
// [default] `scriptingEnabled: true` - tag contains one text element
const withScripts = cheerio.load(noscript)('noscript');
expect(withScripts).toHaveLength(1);
@@ -432,7 +433,7 @@ describe('cheerio', () => {
});
// Should contain location data only with truthful option value
test('{sourceCodeLocationInfo: ???}', () => {
it('{sourceCodeLocationInfo: ???}', () => {
// Location data should not be present
for (const val of [undefined, null, 0, false, '']) {
const options = { sourceCodeLocationInfo: val as never };

30
src/index-browser.spec.ts Normal file
View File

@@ -0,0 +1,30 @@
import { describe, it, expect } from 'vitest';
import * as cheerio from './index-browser.js';
import * as statics from './static.js';
describe('static method re-exports', () => {
it('should export expected static methods', () => {
for (const key of Object.keys(statics) as (keyof typeof statics)[]) {
if (key === 'extract') continue;
expect(typeof cheerio[key]).toBe(typeof statics[key]);
}
});
it('should have a functional `html` that is bound to the default instance', () => {
expect(cheerio.html(cheerio.default('<div>test div</div>'))).toBe(
'<div>test div</div>',
);
});
it('should have a functional `xml` that is bound to the default instance', () => {
expect(cheerio.xml(cheerio.default('<div>test div</div>'))).toBe(
'<div>test div</div>',
);
});
it('should have a functional `text` that is bound to the default instance', () => {
expect(cheerio.text(cheerio.default('<div>test div</div>'))).toBe(
'test div',
);
});
});

148
src/index-browser.ts Normal file
View File

@@ -0,0 +1,148 @@
import { type CheerioAPI, getLoad } from './load.js';
import { getParse } from './parse.js';
import { renderWithParse5, parseWithParse5 } from './parsers/parse5-adapter.js';
import * as staticMethods from './static.js';
import type { BasicAcceptedElems } from './types.js';
import type { CheerioOptions } from './options.js';
import renderWithHtmlparser2 from 'dom-serializer';
import { parseDocument as parseWithHtmlparser2 } from 'htmlparser2';
import type { AnyNode } from 'domhandler';
/**
* The main types of Cheerio objects.
*
* @category Cheerio
*/
export type { Cheerio } from './cheerio.js';
/**
* Types used in signatures of Cheerio methods.
*
* @category Cheerio
*/
export * from './types.js';
export type { CheerioOptions, HTMLParser2Options } from './options.js';
export type { CheerioAPI } from './load.js';
export { contains, merge } from './static.js';
const parse = getParse((content, options, isDocument, context) =>
options._useHtmlParser2
? parseWithHtmlparser2(content, options)
: parseWithParse5(content, options, isDocument, context),
);
// Duplicate docs due to https://github.com/TypeStrong/typedoc/issues/1616
/**
* Create a querying function, bound to a document created from the provided
* markup.
*
* Note that similar to web browser contexts, this operation may introduce
* `<html>`, `<head>`, and `<body>` elements; set `isDocument` to `false` to
* switch to fragment mode and disable this.
*
* @category Loading
* @param content - Markup to be loaded.
* @param options - Options for the created instance.
* @param isDocument - Allows parser to be switched to fragment mode.
* @returns The loaded document.
* @see {@link https://cheerio.js.org#loading} for additional usage information.
*/
export const load: (
content: string | AnyNode | AnyNode[] | Buffer,
options?: CheerioOptions | null,
isDocument?: boolean,
) => CheerioAPI = getLoad(parse, (dom, options) =>
options._useHtmlParser2
? renderWithHtmlparser2(dom, options)
: renderWithParse5(dom),
);
const defaultInstance: CheerioAPI = load([]);
/**
* The default cheerio instance.
*
* @deprecated Use the function returned by `load` instead. To access load, make
* sure you are importing `* as cheerio` instead of this default export.
* @category Deprecated
*/
export default defaultInstance;
/**
* Renders the document.
*
* @deprecated Use `html` on the loaded instance instead.
* @category Deprecated
* @param dom - Element to render.
* @param options - Options for the renderer.
* @returns The rendered document.
*/
export const html: (
dom: BasicAcceptedElems<AnyNode>,
options?: CheerioOptions,
) => string = staticMethods.html.bind(defaultInstance);
/**
* Render the document as XML.
*
* @deprecated Use `xml` on the loaded instance instead.
* @category Deprecated
* @param dom - Element to render.
* @returns The rendered document.
*/
export const xml: (dom: BasicAcceptedElems<AnyNode>) => string =
staticMethods.xml.bind(defaultInstance);
/**
* Render the document as text.
*
* This returns the `textContent` of the passed elements. The result will
* include the contents of `<script>` and `<style>` elements. To avoid this, use
* `.prop('innerText')` instead.
*
* @deprecated Use `text` on the loaded instance instead.
* @category Deprecated
* @param elements - Elements to render.
* @returns The rendered document.
*/
export const text: (elements: ArrayLike<AnyNode>) => string =
staticMethods.text.bind(defaultInstance);
/**
* The `.parseHTML` method exported by the Cheerio module is deprecated.
*
* In order to promote consistency with the jQuery library, users are encouraged
* to instead use the static method of the same name as it is defined on the
* "loaded" Cheerio factory function.
*
* @deprecated Use `parseHTML` on the loaded instance instead.
* @category Deprecated
* @example
*
* ```js
* const $ = cheerio.load('');
* $.parseHTML('<b>markup</b>');
* ```
*/
export const parseHTML = staticMethods.parseHTML.bind(
defaultInstance,
) as typeof staticMethods.parseHTML;
/**
* The `.root` method exported by the Cheerio module is deprecated.
*
* Users seeking to access the top-level element of a parsed document should
* instead use the `root` static method of a "loaded" Cheerio function.
*
* @deprecated Use `root` on the loaded instance instead.
* @category Deprecated
* @example
*
* ```js
* const $ = cheerio.load('');
* $.root();
* ```
*/
export const root = staticMethods.root.bind(
defaultInstance,
) as typeof staticMethods.root;

View File

@@ -1,29 +1,180 @@
import { describe, it, expect, afterEach } from 'vitest';
import * as cheerio from './index.js';
import * as statics from './static.js';
import { Writable } from 'node:stream';
import { createServer, type Server } from 'node:http';
describe('static method re-exports', () => {
it('should export expected static methods', () => {
for (const key of Object.keys(statics) as (keyof typeof statics)[]) {
if (key === 'extract') continue;
expect(typeof cheerio[key]).toBe(typeof statics[key]);
}
function noop() {
// Ignore
}
// Returns a promise and a resolve function
function getPromise() {
let cb: (error: Error | null | undefined, $: cheerio.CheerioAPI) => void;
const promise = new Promise<cheerio.CheerioAPI>((resolve, reject) => {
cb = (error, $) => (error ? reject(error) : resolve($));
});
it('should have a functional `html` that is bound to the default instance', () => {
expect(cheerio.html(cheerio.default('<div>test div</div>'))).toBe(
'<div>test div</div>',
return { promise, cb: cb! };
}
const TEST_HTML = '<h1>Hello World</h1>';
const TEST_HTML_UTF16 = Buffer.from(TEST_HTML, 'utf16le');
const TEST_HTML_UTF16_BOM = Buffer.from([
// UTF16-LE BOM
0xff,
0xfe,
...Array.from(TEST_HTML_UTF16),
]);
describe('loadBuffer', () => {
it('should parse UTF-8 HTML', () => {
const $ = cheerio.loadBuffer(Buffer.from(TEST_HTML));
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
});
it('should have a functional `xml` that is bound to the default instance', () => {
expect(cheerio.xml(cheerio.default('<div>test div</div>'))).toBe(
'<div>test div</div>',
);
});
it('should parse UTF-16 HTML', () => {
const $ = cheerio.loadBuffer(TEST_HTML_UTF16_BOM);
it('should have a functional `text` that is bound to the default instance', () => {
expect(cheerio.text(cheerio.default('<div>test div</div>'))).toBe(
'test div',
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
});
});
describe('stringStream', () => {
it('should use parse5 by default', async () => {
const { promise, cb } = getPromise();
const stream = cheerio.stringStream({}, cb);
expect(stream).toBeInstanceOf(Writable);
stream.end(TEST_HTML);
const $ = await promise;
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
});
it('should error from parse5 on buffer', () => {
const stream = cheerio.stringStream({}, noop);
expect(stream).toBeInstanceOf(Writable);
expect(() => stream.write(Buffer.from(TEST_HTML))).toThrow(
'Parser can work only with string streams.',
);
});
it('should use htmlparser2 for XML', async () => {
const { promise, cb } = getPromise();
const stream = cheerio.stringStream({ xmlMode: true }, cb);
expect(stream).toBeInstanceOf(Writable);
stream.end(TEST_HTML);
const $ = await promise;
expect($.html()).toBe(TEST_HTML);
});
});
describe('decodeStream', () => {
it('should use parse5 by default', async () => {
const { promise, cb } = getPromise();
const stream = cheerio.decodeStream({}, cb);
expect(stream).toBeInstanceOf(Writable);
stream.end(TEST_HTML_UTF16_BOM);
const $ = await promise;
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
});
it('should use htmlparser2 for XML', async () => {
const { promise, cb } = getPromise();
const stream = cheerio.decodeStream({ xmlMode: true }, cb);
expect(stream).toBeInstanceOf(Writable);
stream.end(TEST_HTML_UTF16_BOM);
const $ = await promise;
expect($.html()).toBe(TEST_HTML);
});
});
describe('fromURL', () => {
let server: Server | undefined;
function createTestServer(
contentType: string,
body: string | Buffer,
): Promise<number> {
return new Promise((resolve, reject) => {
server = createServer((_req, res) => {
res.writeHead(200, { 'Content-Type': contentType });
res.end(body);
});
server.listen(0, () => {
const address = server?.address();
if (typeof address === 'string' || address == null) {
reject(new Error('Failed to get port'));
} else {
resolve(address.port);
}
});
});
}
afterEach(
async () =>
new Promise<void>((resolve, reject) => {
if (server) {
server.close((err) => (err ? reject(err) : resolve()));
server = undefined;
} else {
resolve();
}
}),
);
it('should fetch UTF-8 HTML', async () => {
const port = await createTestServer('text/html', TEST_HTML);
const $ = await cheerio.fromURL(`http://localhost:${port}`);
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
});
it('should fetch UTF-16 HTML', async () => {
const port = await createTestServer(
'text/html; charset=utf-16le',
TEST_HTML_UTF16,
);
const $ = await cheerio.fromURL(`http://localhost:${port}`);
expect($.html()).toBe(
`<html><head></head><body>${TEST_HTML}</body></html>`,
);
});
it('should parse XML based on Content-Type', async () => {
const port = await createTestServer('text/xml', TEST_HTML);
const $ = await cheerio.fromURL(`http://localhost:${port}`);
expect($.html()).toBe(TEST_HTML);
});
});

View File

@@ -1,148 +1,267 @@
import { type CheerioAPI, getLoad } from './load.js';
import { getParse } from './parse.js';
import { renderWithParse5, parseWithParse5 } from './parsers/parse5-adapter.js';
import * as staticMethods from './static.js';
import type { BasicAcceptedElems } from './types.js';
import type { CheerioOptions } from './options.js';
import renderWithHtmlparser2 from 'dom-serializer';
import { parseDocument as parseWithHtmlparser2 } from 'htmlparser2';
import type { AnyNode } from 'domhandler';
/**
* The main types of Cheerio objects.
*
* @category Cheerio
* @file Batteries-included version of Cheerio. This module includes several
* convenience methods for loading documents from various sources.
*/
export type { Cheerio } from './cheerio.js';
export * from './index-browser.js';
// TODO: Remove this
export { default } from './index-browser.js';
/* eslint-disable n/no-unsupported-features/node-builtins */
import type { CheerioAPI, CheerioOptions } from './index-browser.js';
import { load } from './index-browser.js';
import { flattenOptions, type InternalOptions } from './options.js';
import { adapter as htmlparser2Adapter } from 'parse5-htmlparser2-tree-adapter';
import * as htmlparser2 from 'htmlparser2';
import { ParserStream as Parse5Stream } from 'parse5-parser-stream';
import {
decodeBuffer,
DecodeStream,
type SnifferOptions,
} from 'encoding-sniffer';
import * as undici from 'undici';
import MIMEType from 'whatwg-mimetype';
import { Writable, finished } from 'node:stream';
/**
* Types used in signatures of Cheerio methods.
*
* @category Cheerio
*/
export * from './types.js';
export type { CheerioOptions, HTMLParser2Options } from './options.js';
export type { CheerioAPI } from './load.js';
export { contains, merge } from './static.js';
const parse = getParse((content, options, isDocument, context) =>
options._useHtmlParser2
? parseWithHtmlparser2(content, options)
: parseWithParse5(content, options, isDocument, context),
);
// Duplicate docs due to https://github.com/TypeStrong/typedoc/issues/1616
/**
* Create a querying function, bound to a document created from the provided
* markup.
*
* Note that similar to web browser contexts, this operation may introduce
* `<html>`, `<head>`, and `<body>` elements; set `isDocument` to `false` to
* switch to fragment mode and disable this.
* Sniffs the encoding of a buffer, then creates a querying function bound to a
* document created from the buffer.
*
* @category Loading
* @param content - Markup to be loaded.
* @param options - Options for the created instance.
* @param isDocument - Allows parser to be switched to fragment mode.
* @example
*
* ```js
* import * as cheerio from 'cheerio';
*
* const buffer = fs.readFileSync('index.html');
* const $ = cheerio.fromBuffer(buffer);
* ```
*
* @param buffer - The buffer to sniff the encoding of.
* @param options - The options to pass to Cheerio.
* @returns The loaded document.
* @see {@link https://cheerio.js.org#loading} for additional usage information.
*/
export const load: (
content: string | AnyNode | AnyNode[] | Buffer,
options?: CheerioOptions | null,
isDocument?: boolean,
) => CheerioAPI = getLoad(parse, (dom, options) =>
options._useHtmlParser2
? renderWithHtmlparser2(dom, options)
: renderWithParse5(dom),
);
export function loadBuffer(
buffer: Buffer,
options: DecodeStreamOptions = {},
): CheerioAPI {
const opts = flattenOptions(options);
const str = decodeBuffer(buffer, {
defaultEncoding: opts?.xmlMode ? 'utf8' : 'windows-1252',
...options.encoding,
});
const defaultInstance: CheerioAPI = load([]);
return load(str, opts);
}
function _stringStream(
options: InternalOptions | undefined,
cb: (err: Error | null | undefined, $: CheerioAPI) => void,
): Writable {
if (options?._useHtmlParser2) {
const parser = htmlparser2.createDocumentStream(
(err, document) => cb(err, load(document)),
options,
);
return new Writable({
decodeStrings: false,
write(chunk, _encoding, callback) {
if (typeof chunk !== 'string') {
throw new TypeError('Expected a string');
}
parser.write(chunk);
callback();
},
final(callback) {
parser.end();
callback();
},
});
}
options ??= {};
options.treeAdapter ??= htmlparser2Adapter;
if (options.scriptingEnabled !== false) {
options.scriptingEnabled = true;
}
const stream = new Parse5Stream(options);
finished(stream, (err) => cb(err, load(stream.document)));
return stream;
}
/**
* The default cheerio instance.
* Creates a stream that parses a sequence of strings into a document.
*
* @deprecated Use the function returned by `load` instead. To access load, make
* sure you are importing `* as cheerio` instead of this default export.
* @category Deprecated
*/
export default defaultInstance;
/**
* Renders the document.
* The stream is a `Writable` stream that accepts strings. When the stream is
* finished, the callback is called with the loaded document.
*
* @deprecated Use `html` on the loaded instance instead.
* @category Deprecated
* @param dom - Element to render.
* @param options - Options for the renderer.
* @returns The rendered document.
*/
export const html: (
dom: BasicAcceptedElems<AnyNode>,
options?: CheerioOptions,
) => string = staticMethods.html.bind(defaultInstance);
/**
* Render the document as XML.
*
* @deprecated Use `xml` on the loaded instance instead.
* @category Deprecated
* @param dom - Element to render.
* @returns The rendered document.
*/
export const xml: (dom: BasicAcceptedElems<AnyNode>) => string =
staticMethods.xml.bind(defaultInstance);
/**
* Render the document as text.
*
* This returns the `textContent` of the passed elements. The result will
* include the contents of `<script>` and `<style>` elements. To avoid this, use
* `.prop('innerText')` instead.
*
* @deprecated Use `text` on the loaded instance instead.
* @category Deprecated
* @param elements - Elements to render.
* @returns The rendered document.
*/
export const text: (elements: ArrayLike<AnyNode>) => string =
staticMethods.text.bind(defaultInstance);
/**
* The `.parseHTML` method exported by the Cheerio module is deprecated.
*
* In order to promote consistency with the jQuery library, users are encouraged
* to instead use the static method of the same name as it is defined on the
* "loaded" Cheerio factory function.
*
* @deprecated Use `parseHTML` on the loaded instance instead.
* @category Deprecated
* @category Loading
* @example
*
* ```js
* const $ = cheerio.load('');
* $.parseHTML('<b>markup</b>');
* import * as cheerio from 'cheerio';
* import * as fs from 'fs';
*
* const writeStream = cheerio.stringStream({}, (err, $) => {
* if (err) {
* // Handle error
* }
*
* console.log($('h1').text());
* // Output: Hello, world!
* });
*
* fs.createReadStream('my-document.html', { encoding: 'utf8' }).pipe(
* writeStream,
* );
* ```
*
* @param options - The options to pass to Cheerio.
* @param cb - The callback to call when the stream is finished.
* @returns The writable stream.
*/
export const parseHTML = staticMethods.parseHTML.bind(
defaultInstance,
) as typeof staticMethods.parseHTML;
export function stringStream(
options: CheerioOptions,
cb: (err: Error | null | undefined, $: CheerioAPI) => void,
): Writable {
return _stringStream(flattenOptions(options), cb);
}
export interface DecodeStreamOptions extends CheerioOptions {
encoding?: SnifferOptions;
}
/**
* The `.root` method exported by the Cheerio module is deprecated.
* Parses a stream of buffers into a document.
*
* Users seeking to access the top-level element of a parsed document should
* instead use the `root` static method of a "loaded" Cheerio function.
* The stream is a `Writable` stream that accepts buffers. When the stream is
* finished, the callback is called with the loaded document.
*
* @deprecated Use `root` on the loaded instance instead.
* @category Deprecated
* @category Loading
* @param options - The options to pass to Cheerio.
* @param cb - The callback to call when the stream is finished.
* @returns The writable stream.
*/
export function decodeStream(
options: DecodeStreamOptions,
cb: (err: Error | null | undefined, $: CheerioAPI) => void,
): Writable {
const { encoding = {}, ...cheerioOptions } = options;
const opts = flattenOptions(cheerioOptions);
// Set the default encoding to UTF-8 for XML mode
encoding.defaultEncoding ??= opts?.xmlMode ? 'utf8' : 'windows-1252';
const decodeStream = new DecodeStream(encoding);
const loadStream = _stringStream(opts, cb);
decodeStream.pipe(loadStream);
return decodeStream;
}
type UndiciStreamOptions = Parameters<typeof undici.stream>[1];
export interface CheerioRequestOptions extends DecodeStreamOptions {
/** The options passed to `undici`'s `stream` method. */
requestOptions?: UndiciStreamOptions;
}
const defaultRequestOptions: UndiciStreamOptions = {
method: 'GET',
// Allow redirects by default
maxRedirections: 5,
// NOTE: `throwOnError` currently doesn't work https://github.com/nodejs/undici/issues/1753
throwOnError: true,
// Set an Accept header
headers: {
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
},
};
/**
* `fromURL` loads a document from a URL.
*
* By default, redirects are allowed and non-2xx responses are rejected.
*
* @category Loading
* @example
*
* ```js
* const $ = cheerio.load('');
* $.root();
* import * as cheerio from 'cheerio';
*
* const $ = await cheerio.fromURL('https://example.com');
* ```
*
* @param url - The URL to load the document from.
* @param options - The options to pass to Cheerio.
* @returns The loaded document.
*/
export const root = staticMethods.root.bind(
defaultInstance,
) as typeof staticMethods.root;
export async function fromURL(
url: string | URL,
options: CheerioRequestOptions = {},
): Promise<CheerioAPI> {
const {
requestOptions = defaultRequestOptions,
encoding = {},
...cheerioOptions
} = options;
let undiciStream: Promise<undici.Dispatcher.StreamData> | undefined;
// Add headers if none were supplied.
requestOptions.headers ??= defaultRequestOptions.headers;
const promise = new Promise<CheerioAPI>((resolve, reject) => {
undiciStream = undici.stream(url, requestOptions, (res) => {
const contentType = res.headers['content-type'] ?? 'text/html';
const mimeType = new MIMEType(
Array.isArray(contentType) ? contentType[0] : contentType,
);
if (!mimeType.isHTML() && !mimeType.isXML()) {
throw new RangeError(
`The content-type "${contentType}" is neither HTML nor XML.`,
);
}
// Forward the charset from the header to the decodeStream.
encoding.transportLayerEncodingLabel = mimeType.parameters.get('charset');
/*
* If we allow redirects, we will have entries in the history.
* The last entry will be the final URL.
*/
const history = (
res.context as
| {
history?: URL[];
}
| undefined
)?.history;
const opts = {
encoding,
// Set XML mode based on the MIME type.
xmlMode: mimeType.isXML(),
// Set the `baseURL` to the final URL.
baseURL: history ? history[history.length - 1] : url,
...cheerioOptions,
};
return decodeStream(opts, (err, $) => (err ? reject(err) : resolve($)));
});
});
// Let's make sure the request is completed before returning the promise.
await undiciStream;
return promise;
}

View File

@@ -1,22 +1,23 @@
import cheerio from './index.js';
import { describe, it, expect } from 'vitest';
import { load } from './index.js';
describe('.load', () => {
it('(html) : should retain original root after creating a new node', () => {
const $ = cheerio.load('<body><ul id="fruits"></ul></body>');
const $ = load('<body><ul id="fruits"></ul></body>');
expect($('body')).toHaveLength(1);
$('<script>');
expect($('body')).toHaveLength(1);
});
it('(html) : should handle lowercase tag options', () => {
const $ = cheerio.load('<BODY><ul id="fruits"></ul></BODY>', {
const $ = load('<BODY><ul id="fruits"></ul></BODY>', {
xml: { lowerCaseTags: true },
});
expect($.html()).toBe('<body><ul id="fruits"/></body>');
});
it('(html) : should handle xml tag option', () => {
const $ = cheerio.load('<body><script><foo></script></body>', {
const $ = load('<body><script><foo></script></body>', {
xml: true,
});
expect($('script')[0].children[0].type).toBe('tag');
@@ -25,7 +26,7 @@ describe('.load', () => {
it('(buffer) : should accept a buffer', () => {
const html = '<html><head></head><body>foo</body></html>';
// eslint-disable-next-line n/no-unsupported-features/node-builtins
const $html = cheerio.load(Buffer.from(html));
const $html = load(Buffer.from(html));
expect($html.html()).toBe(html);
});
});

View File

@@ -1,3 +1,4 @@
import { describe, it, expect } from 'vitest';
import type { Document, Element } from 'domhandler';
import { getParse } from './parse.js';

View File

@@ -1,3 +1,4 @@
import { describe, it, expect, beforeEach } from 'vitest';
import * as fixtures from './__fixtures__/fixtures.js';
import cheerio, { type CheerioAPI } from './index.js';

View File

@@ -1,5 +1,6 @@
import type { BasicAcceptedElems } from './types.js';
import type { CheerioAPI, Cheerio } from './index.js';
import type { CheerioAPI } from './load.js';
import type { Cheerio } from './cheerio.js';
import type { AnyNode, Document } from 'domhandler';
import { textContent } from 'domutils';
import {

View File

@@ -1,3 +1,4 @@
import { describe, it, expect } from 'vitest';
import * as utils from './utils.js';
describe('util functions', () => {

View File

@@ -1,5 +0,0 @@
{
"extends": "./tsconfig.json",
"include": ["src", "benchmark", "scripts"],
"exclude": []
}

View File

@@ -1,16 +1,13 @@
{
"compilerOptions": {
/* Basic Options */
"target": "es5",
"target": "es2019",
"module": "node16",
"lib": ["ES2015.Core"],
"declaration": true,
"declarationMap": true,
"sourceMap": true,
"outDir": "lib",
// esModuleInterop is implied by `--module node16`,
// but ts-jest doesn't recognize that and needs it listed.
"esModuleInterop": true,
/* Strict Type-Checking Options */
"strict": true,
@@ -25,12 +22,5 @@
"noPropertyAccessFromIndexSignature": true,
"noUnusedLocals": true,
"noUnusedParameters": true
},
"include": ["src"],
"exclude": [
"**/*.spec.ts",
"**/__fixtures__/*",
"**/__tests__/*",
"**/__snapshots__/*"
]
}
}

15
vitest.config.ts Normal file
View File

@@ -0,0 +1,15 @@
import { defineConfig } from 'vitest/config';
export default defineConfig({
test: {
coverage: {
exclude: [
'benchmark/**',
'scripts/**',
'website/**',
'dist/**',
'*.config.ts',
],
},
},
});

View File

@@ -6,5 +6,17 @@
},
"rules": {
"n/no-missing-require": 0
}
},
"overrides": [
{
"files": ["*.config.js"],
"globals": {
"require": "readonly",
"module": "readonly"
},
"parserOptions": {
"sourceType": "script"
}
}
]
}

View File

@@ -144,7 +144,7 @@ You can also use Cheerio's _slim_ export, which always uses `htmlparser2`. This
avoids loading `parse5`, which saves some bytes eg. in browser environments:
```js
import * as cheerio from 'cheerio/lib/slim';
import * as cheerio from 'cheerio/slim';
```
:::

View File

@@ -18,21 +18,9 @@ document.
:::danger Availability of methods
Only the `load` method is available by default. If you are using current NodeJS
version, other methods will be available if you use an ES Module imports (your
file has a `.mjs` extension, or you have `type: "module"` in your
`package.json`). You can also directly import or require
`cheerio/lib/batteries`.
<details>
<summary>Why is that the case?</summary>
The methods that are not available by default have dependencies that use the
[`node:` protocol](https://nodejs.org/api/esm.html#node-imports). This protocol
is only supported for ES Modules in Node 12. To maintain compatibility with Node
12, we only provide methods besides `load` when you use an ES Module import.
</details>
The `loadBuffer`, `stringStream`, `decodeStream`, and `fromURL` methods are not
available in the browser environment. Instead, use the `load` method to parse
HTML strings.
:::

View File

@@ -230,7 +230,7 @@ const config = {
'docusaurus-plugin-typedoc',
{
// TypeDoc options
entryPoints: ['../src/batteries.ts'],
entryPoints: ['../src/index.ts'],
tsconfig: '../tsconfig.json',
readme: 'none',
excludePrivate: true,

View File

@@ -1,5 +1,5 @@
import React from 'react';
import * as cheerio from '../../../../lib/index.js';
import * as cheerio from '../../../../dist/index.js';
// Add react-live imports you need here
const ReactLiveScope = {