I created a custom import function that overcomes all limits of IMPORTXML I have a sheet using this in about 800 cells and it works great.
It makes use of Google Sheet’s custom scripts (Tools > Script editor…) and searches through content using regex instead of xpath.
function importRegex(url, regexInput) {
var output = '';
var fetchedUrl = UrlFetchApp.fetch(url, {muteHttpExceptions: true});
if (fetchedUrl) {
var html = fetchedUrl.getContentText();
if (html.length && regexInput.length) {
output = html.match(new RegExp(regexInput, 'i'))[1];
}
}
// Grace period to not overload
Utilities.sleep(1000);
return output;
}
You can then use this function like any function.
=importRegex("https://example.com", "<title>(.*)</title>")
Of course, you can also reference cells.
=importRegex(A2, "<title>(.*)</title>")
If you don’t want to see HTML entities in the output, you can use this function.
var htmlEntities = {
nbsp: ' ',
cent: '¢',
pound: '£',
yen: '¥',
euro: '€',
copy: '?',
reg: '?',
lt: '<',
gt: '>',
mdash: '–',
ndash: '-',
quot: '"',
amp: '&',
apos: '''
};
function unescapeHTML(str) {
return str.replace(/&([^;]+);/g, function (entity, entityCode) {
var match;
if (entityCode in htmlEntities) {
return htmlEntities[entityCode];
} else if (match = entityCode.match(/^#x([da-fA-F]+)$/)) {
return String.fromCharCode(parseInt(match[1], 16));
} else if (match = entityCode.match(/^#(d+)$/)) {
return String.fromCharCode(~~match[1]);
} else {
return entity;
}
});
};
All together…
function importRegex(url, regexInput) {
var output = '';
var fetchedUrl = UrlFetchApp.fetch(url, {muteHttpExceptions: true});
if (fetchedUrl) {
var html = fetchedUrl.getContentText();
if (html.length && regexInput.length) {
output = html.match(new RegExp(regexInput, 'i'))[1];
}
}
// Grace period to not overload
Utilities.sleep(1000);
return unescapeHTML(output);
}
var htmlEntities = {
nbsp: ' ',
cent: '¢',
pound: '£',
yen: '¥',
euro: '€',
copy: '?',
reg: '?',
lt: '<',
gt: '>',
mdash: '–',
ndash: '-',
quot: '"',
amp: '&',
apos: '''
};
function unescapeHTML(str) {
return str.replace(/&([^;]+);/g, function (entity, entityCode) {
var match;
if (entityCode in htmlEntities) {
return htmlEntities[entityCode];
} else if (match = entityCode.match(/^#x([da-fA-F]+)$/)) {
return String.fromCharCode(parseInt(match[1], 16));
} else if (match = entityCode.match(/^#(d+)$/)) {
return String.fromCharCode(~~match[1]);
} else {
return entity;
}
});
};