Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request from GHSA-h6q6-9hqw-rwfv
* fix!: Preserve quotes in DOCTYPE declaration

Since the only purpose of parsing the DOCTYPE is to be able to restore it when serializing,
we decided that it would be best to leave the parsed publicId and systemId as is,
including any quotes.

BREAKING CHANGE: If somebody relies on the actual unquoted values of those ids,
they will need to take care of either single or double quotes and the right escaping.

(Without this change this would not have been possible because the SAX parser already
dropped the information about the quotes that have been used in the source.)

https://www.w3.org/TR/2006/REC-xml11-20060816/#dtd
https://www.w3.org/TR/2006/REC-xml11-20060816/#IDAX1KS (External Entity Declaration)

Co-authored-by: Christian Bewernitz <coder@karfau.de>
Co-authored-by: Chris Brody <chris.brody+brodybits@gmail.com>

* feat(security): Improve error reporting; throw on duplicate attribute

BREAKING CHANGE: It is currently not clear how to consistently deal with duplicate attributes,
so it is also safer for our users to fail when detecting them.

It is possible to configure the `DOMParser.errorHandler` before parsing, to handle those errors differently.

To accomplish this and also be able to verify it in tests we needed to:

- create a new `Error` type `ParseError` and export it
- Throw `ParseError` from `errorHandler.fatalError` and prevent those from being caught in `XMLReader`.
- export `DOMHandler` constructor as `__DOMHandler`

Co-authored-by: Christian Bewernitz <coder@karfau.de>
Co-authored-by: Chris Brody <chris.brody+brodybits@gmail.com>

Co-authored-by: Christian Bewernitz <coder@karfau.de>
  • Loading branch information
brodybits and karfau committed Mar 9, 2021
1 parent a4d717c commit d4201b9
Show file tree
Hide file tree
Showing 14 changed files with 1,190 additions and 134 deletions.
8 changes: 5 additions & 3 deletions lib/dom-parser.js
Expand Up @@ -178,8 +178,7 @@ DOMHandler.prototype = {
console.error('[xmldom error]\t'+error,_locator(this.locator));
},
fatalError:function(error) {
console.error('[xmldom fatalError]\t'+error,_locator(this.locator));
throw error;
throw new ParseError(error, this.locator);
}
}
function _locator(l){
Expand Down Expand Up @@ -244,8 +243,11 @@ function appendElement (hander,node) {

//if(typeof require == 'function'){
var htmlEntity = require('./entities');
var XMLReader = require('./sax').XMLReader;
var sax = require('./sax');
var XMLReader = sax.XMLReader;
var ParseError = sax.ParseError;
var DOMImplementation = exports.DOMImplementation = require('./dom').DOMImplementation;
exports.XMLSerializer = require('./dom').XMLSerializer ;
exports.DOMParser = DOMParser;
exports.__DOMHandler = DOMHandler;
//}
8 changes: 4 additions & 4 deletions lib/dom.js
Expand Up @@ -1094,13 +1094,13 @@ function serializeToString(node,buf,isHTML,nodeFilter,visibleNamespaces){
var sysid = node.systemId;
buf.push('<!DOCTYPE ',node.name);
if(pubid){
buf.push(' PUBLIC "',pubid);
buf.push(' PUBLIC ', pubid);
if (sysid && sysid!='.') {
buf.push( '" "',sysid);
buf.push(' ', sysid);
}
buf.push('">');
buf.push('>');
}else if(sysid && sysid!='.'){
buf.push(' SYSTEM "',sysid,'">');
buf.push(' SYSTEM ', sysid, '>');
}else{
var sub = node.internalSubset;
if(sub){
Expand Down
68 changes: 45 additions & 23 deletions lib/sax.js
Expand Up @@ -18,6 +18,21 @@ var S_ATTR_END = 5;//attr value end and no space(quot end)
var S_TAG_SPACE = 6;//(attr value end || tag end ) && (space offer)
var S_TAG_CLOSE = 7;//closed el<el />

/**
* Creates an error that will not be caught by XMLReader aka the SAX parser.
*
* @param {string} message
* @param {any?} locator Optional, can provide details about the location in the source
* @constructor
*/
function ParseError(message, locator) {
this.message = message
this.locator = locator
if(Error.captureStackTrace) Error.captureStackTrace(this, ParseError);
}
ParseError.prototype = new Error();
ParseError.prototype.name = ParseError.name

function XMLReader(){

}
Expand Down Expand Up @@ -126,7 +141,7 @@ function parse(source,defaultNSMapCopy,entityMap,domBuilder,errorHandler){
}
}
if(!endMatch){
errorHandler.fatalError("end tag name: "+tagName+' is not match the current start tagName:'+config.tagName );
errorHandler.fatalError("end tag name: "+tagName+' is not match the current start tagName:'+config.tagName ); // No known test case
}
}else{
parseStack.push(config)
Expand Down Expand Up @@ -187,10 +202,11 @@ function parse(source,defaultNSMapCopy,entityMap,domBuilder,errorHandler){
}
}
}catch(e){
if (e instanceof ParseError) {
throw e;
}
errorHandler.error('element parse error: '+e)
//errorHandler.error('element parse error: '+e);
end = -1;
//throw e;
}
if(end>start){
start = end;
Expand All @@ -211,6 +227,16 @@ function copyLocator(f,t){
* @return end of the elementStartPart(end of elementEndPart for selfClosed el)
*/
function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,errorHandler){

/**
* @param {string} qname
* @param {string} value
* @param {number} startIndex
*/
function addAttribute(qname, value, startIndex) {
if (qname in el.attributeNames) errorHandler.fatalError('Attribute ' + qname + ' redefined')
el.addValue(qname, value, startIndex)
}
var attrName;
var value;
var p = ++start;
Expand All @@ -226,7 +252,7 @@ function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,error
s = S_EQ;
}else{
//fatalError: equal must after attrName or space after attrName
throw new Error('attribute equal must after attrName');
throw new Error('attribute equal must after attrName'); // No known test case
}
break;
case '\'':
Expand All @@ -241,7 +267,7 @@ function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,error
p = source.indexOf(c,start)
if(p>0){
value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
el.add(attrName,value,start-1);
addAttribute(attrName, value, start-1);
s = S_ATTR_END;
}else{
//fatalError: no end quot match
Expand All @@ -250,14 +276,14 @@ function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,error
}else if(s == S_ATTR_NOQUOT_VALUE){
value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
//console.log(attrName,value,start,p)
el.add(attrName,value,start);
addAttribute(attrName, value, start);
//console.dir(el)
errorHandler.warning('attribute "'+attrName+'" missed start quot('+c+')!!');
start = p+1;
s = S_ATTR_END
}else{
//fatalError: no equal before
throw new Error('attribute value must after "="');
throw new Error('attribute value must after "="'); // No known test case
}
break;
case '/':
Expand All @@ -275,11 +301,10 @@ function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,error
break;
//case S_EQ:
default:
throw new Error("attribute invalid close char('/')")
throw new Error("attribute invalid close char('/')") // No known test case
}
break;
case ''://end document
//throw new Error('unexpected end of input')
errorHandler.error('unexpected end of input');
if(s == S_TAG){
el.setTagName(source.slice(start,p));
Expand All @@ -305,13 +330,13 @@ function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,error
value = attrName;
}
if(s == S_ATTR_NOQUOT_VALUE){
errorHandler.warning('attribute "'+value+'" missed quot(")!!');
el.add(attrName,value.replace(/&#?\w+;/g,entityReplacer),start)
errorHandler.warning('attribute "'+value+'" missed quot(")!');
addAttribute(attrName, value.replace(/&#?\w+;/g,entityReplacer), start)
}else{
if(currentNSMap[''] !== 'http://www.w3.org/1999/xhtml' || !value.match(/^(?:disabled|checked|selected)$/i)){
errorHandler.warning('attribute "'+value+'" missed value!! "'+value+'" instead!!')
}
el.add(value,value,start)
addAttribute(value, value, start)
}
break;
case S_EQ:
Expand All @@ -336,7 +361,7 @@ function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,error
case S_ATTR_NOQUOT_VALUE:
var value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
errorHandler.warning('attribute "'+value+'" missed quot(")!!');
el.add(attrName,value,start)
addAttribute(attrName, value, start)
case S_ATTR_END:
s = S_TAG_SPACE;
break;
Expand All @@ -359,7 +384,7 @@ function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,error
if(currentNSMap[''] !== 'http://www.w3.org/1999/xhtml' || !attrName.match(/^(?:disabled|checked|selected)$/i)){
errorHandler.warning('attribute "'+attrName+'" missed value!! "'+attrName+'" instead2!!')
}
el.add(attrName,attrName,start);
addAttribute(attrName, attrName, start);
start = p;
s = S_ATTR;
break;
Expand Down Expand Up @@ -542,8 +567,7 @@ function parseDCC(source,start,domBuilder,errorHandler){//sure start with '<!'
}
}
var lastMatch = matchs[len-1]
domBuilder.startDTD(name,pubid && pubid.replace(/^(['"])(.*?)\1$/,'$2'),
sysid && sysid.replace(/^(['"])(.*?)\1$/,'$2'));
domBuilder.startDTD(name, pubid, sysid);
domBuilder.endDTD();

return lastMatch.index+lastMatch[0].length
Expand All @@ -569,11 +593,8 @@ function parseInstruction(source,start,domBuilder){
return -1;
}

/**
* @param source
*/
function ElementAttributes(source){

function ElementAttributes(){
this.attributeNames = {}
}
ElementAttributes.prototype = {
setTagName:function(tagName){
Expand All @@ -582,10 +603,11 @@ ElementAttributes.prototype = {
}
this.tagName = tagName
},
add:function(qName,value,offset){
addValue:function(qName, value, offset) {
if(!tagNamePattern.test(qName)){
throw new Error('invalid attribute:'+qName)
}
this.attributeNames[qName] = this.length;
this[this.length++] = {qName:qName,value:value,offset:offset}
},
length:0,
Expand Down Expand Up @@ -621,4 +643,4 @@ function split(source,start){
}

exports.XMLReader = XMLReader;

exports.ParseError = ParseError;

0 comments on commit d4201b9

Please sign in to comment.