parseDSL.ts

import { isObject } from "utils/object";
import { isArray } from "utils/array";
import type {
  HtmlTag,
  HtmlNode,
  HtmlTagNode,
  PageSyntax,
  HtmlAttributes,
} from "../types/html";

/**
 * Is `attrs` a set of HTML attributes`
 */
const isHtmlAttributes = (attrs: any): attrs is HtmlAttributes => {
  return isObject(attrs);
};

// Utilities for parsing the HTML DSL internal to this project.

/**
 * Get the name of a tag.
 */
const tagName = ([name]: HtmlTagNode) => name;

/**
 * Get the attributes object of a tag.
 */
const tagAttributes = ([, attributes]: HtmlTagNode):
  | HtmlAttributes
  | undefined => {
  return isHtmlAttributes(attributes) ? attributes : undefined;
};

/**
 * Collect elements from an HTML page that meet the provided predicate.
 */
const collectElements = (
  htmlPage: PageSyntax,
  predicate: (node: HtmlNode) => boolean
): HtmlNode[] => {
  // assume we only look for roots, so we return directly here lol
  if (predicate(htmlPage)) {
    return [htmlPage];
  } else if (isArray(htmlPage)) {
    // if we can act on the contents:
    const results = htmlPage.flatMap((tag) =>
      collectElements(tag as HtmlNode, predicate)
    );
    return results;
  }

  return [];
};

/**
 * Find html elements with the given tag names on an html page
 */
const findTags = (htmlPage: PageSyntax, tags: HtmlTag[]) => {
  return collectElements(
    htmlPage,
    (tl: HtmlNode) =>
      (isHtmlTagNode(tl) && tl?.[0] && tags.includes(tl?.[0])) || false
  );
};

const isHtmlTagNode = (v: any): v is HtmlTagNode => {
  return isArray(v);
};

/**
 * Get the link(s) embedded in an HTML tag
 * for example, the 'href' field of an <a> tag,
 * the 'link' of a 'style' tag,
 * or the 'src' of a 'script' tag.
 */
const getTagLink = (tag: HtmlNode): string | undefined => {
  if (!isHtmlTagNode(tag)) {
    return;
  }

  const name = tagName(tag);
  const attrs = tagAttributes(tag);

  // NOTE: There is a bug here where some attrs to an a tag are null. Not sure why.
  switch (true) {
    case ["a", "href", "link"].includes(name):
      return attrs?.href;
    case ["img", "script"].includes(name):
      return attrs?.src;
    default:
      throw new Error(
        `No link found in tag, invalid input to getLinkTag: ${tag.toString()}`
      );
  }
};

export { isHtmlAttributes, findTags, getTagLink };

Date	Hash
2024-04-13	0c9b2c0e
2024-04-12	cd584b50
2024-04-12	d50b0fa3
2024-04-01	a3c37e13
2024-04-01	3baa0660
2024-04-01	69a0562b
2024-04-01	110e0165
2024-03-31	216698d1
2024-03-31	3a28fba7
2024-03-31	b53e2351
2024-03-31	c64fbf31
2024-03-31	02dca1b8
2024-03-31	2214e8fd
2024-03-31	3e22c4b6
2024-03-31	796f78c6
2024-03-31	9a544393

Next	builder.ts
Up	html