import{isObject}from"utils/object";import{isArray}from"utils/array";importtype{HtmlTag,HtmlNode,HtmlTagNode,PageSyntax,HtmlAttributes,}from"../types/html";/** * Is `attrs` a set of HTML attributes` */constisHtmlAttributes=(attrs: any): attrs is HtmlAttributes=>{returnisObject(attrs);};// Utilities for parsing the HTML DSL internal to this project./** * Get the name of a tag. */consttagName=([name]: HtmlTagNode)=>name;/** * Get the attributes object of a tag. */consttagAttributes=([,attributes]: HtmlTagNode):
|HtmlAttributes|undefined=>{returnisHtmlAttributes(attributes) ? attributes : undefined;};/** * Collect elements from an HTML page that meet the provided predicate. */constcollectElements=(htmlPage: PageSyntax,predicate: (node: HtmlNode)=>boolean): HtmlNode[]=>{// assume we only look for roots, so we return directly here lolif(predicate(htmlPage)){return[htmlPage];}elseif(isArray(htmlPage)){// if we can act on the contents:constresults=htmlPage.flatMap((tag)=>collectElements(tagasHtmlNode,predicate));returnresults;}return[];};/** * Find html elements with the given tag names on an html page */constfindTags=(htmlPage: PageSyntax,tags: HtmlTag[])=>{returncollectElements(htmlPage,(tl: HtmlNode)=>(isHtmlTagNode(tl)&&tl?.[0]&&tags.includes(tl?.[0]))||false);};constisHtmlTagNode=(v: any): v is HtmlTagNode=>{returnisArray(v);};/** * Get the link(s) embedded in an HTML tag * for example, the 'href' field of an <a> tag, * the 'link' of a 'style' tag, * or the 'src' of a 'script' tag. */constgetTagLink=(tag: HtmlNode): string|undefined=>{if(!isHtmlTagNode(tag)){return;}constname=tagName(tag);constattrs=tagAttributes(tag);// NOTE: There is a bug here where some attrs to an a tag are null. Not sure why.switch(true){case["a","href","link"].includes(name):
returnattrs?.href;case["img","script"].includes(name):
returnattrs?.src;default:
thrownewError(`No link found in tag, invalid input to getLinkTag: ${tag.toString()}`);}};export{isHtmlAttributes,findTags,getTagLink};