Unverified Commit 14a349ff authored by Hong Minhee's avatar Hong Minhee
Browse files

Recognize `alternate` AS objects in <link>/<a>

parent fb33da0e
Loading
Loading
Loading
Loading
+11 −2
Original line number Diff line number Diff line
@@ -10,12 +10,21 @@ To be released.

 -  Removed `expand` option of `Object.toJsonLd()` method, which was deprecated
    in version 0.14.0.  Use `format: "expand"` option instead.

 -  Added `Context.lookupObject()` method.
 -  Default document loaders now recognize `alternate` ActivityStreams objects
    in the `Link` header.

 -  Default document loaders now recognize ActivityStream objects in more ways:

     -  Loaders now recognize `alternate` ActivityStreams objects in the `Link`
        header.
     -  Loaders now recognize `alternate` ActivityStreams objects in
        the `<link>`/`<a>` HTML elements.

 -  Added `allowPrivateAddress` option to `CreateFederationOptions` interface.

 -  Renamed the short option `-c` for `--compact` of `fedify lookup` command to
    `-C` to avoid conflict with the short option `-c` for `--cache-dir`.

 -  Added `-r`/`--raw` option to `fedify lookup` command to output the raw JSON
    object.

+62 −0
Original line number Diff line number Diff line
@@ -106,6 +106,68 @@ test("fetchDocumentLoader()", async (t) => {
    });
  });

  mf.mock("GET@/html-link", (_req) =>
    new Response(
      `<html>
        <head>
          <meta charset=utf-8>
          <link
            rel=alternate
            type='application/activity+json'
            href="https://example.com/object">
        </head>
      </html>`,
      {
        status: 200,
        headers: { "Content-Type": "text/html; charset=utf-8" },
      },
    ));

  await t.step("HTML <link>", async () => {
    assertEquals(await fetchDocumentLoader("https://example.com/html-link"), {
      contextUrl: null,
      documentUrl: "https://example.com/object",
      document: {
        "@context": "https://www.w3.org/ns/activitystreams",
        id: "https://example.com/object",
        name: "Fetched object",
        type: "Object",
      },
    });
  });

  mf.mock("GET@/html-a", (_req) =>
    new Response(
      `<html>
        <head>
          <meta charset=utf-8>
        </head>
        <body>
          <a
            rel=alternate
            type=application/activity+json
            href=https://example.com/object>test</a>
        </body>
      </html>`,
      {
        status: 200,
        headers: { "Content-Type": "text/html; charset=utf-8" },
      },
    ));

  await t.step("HTML <a>", async () => {
    assertEquals(await fetchDocumentLoader("https://example.com/html-a"), {
      contextUrl: null,
      documentUrl: "https://example.com/object",
      document: {
        "@context": "https://www.w3.org/ns/activitystreams",
        id: "https://example.com/object",
        name: "Fetched object",
        type: "Object",
      },
    });
  });

  mf.mock("GET@/404", (_req) => new Response("", { status: 404 }));

  await t.step("not ok", async () => {
+35 −0
Original line number Diff line number Diff line
@@ -146,6 +146,41 @@ async function getRemoteDocument(
      }
    }
  }
  if (
    !jsonLd &&
    (contentType === "text/html" || contentType?.startsWith("text/html;") ||
      contentType === "application/xhtml+xml" ||
      contentType?.startsWith("application/xhtml+xml;"))
  ) {
    const p = /<(a|link)((\s+[a-z][a-z:_-]*=("[^"]*"|'[^']*'|[^\s>]+))+)\/?>/ig;
    const p2 = /\s+([a-z][a-z:_-]*)=("([^"]*)"|'([^']*)'|([^\s>]+))/ig;
    const html = await response.text();
    let m: RegExpExecArray | null;
    const rawAttribs: string[] = [];
    while ((m = p.exec(html)) !== null) rawAttribs.push(m[2]);
    for (const rawAttrs of rawAttribs) {
      let m2: RegExpExecArray | null;
      const attribs: Record<string, string> = {};
      while ((m2 = p2.exec(rawAttrs)) !== null) {
        const key = m2[1].toLowerCase();
        const value = m2[3] ?? m2[4] ?? m2[5] ?? "";
        attribs[key] = value;
      }
      if (
        attribs.rel === "alternate" && "type" in attribs && (
          attribs.type === "application/activity+json" ||
          attribs.type === "application/ld+json" ||
          attribs.type.startsWith("application/ld+json;")
        ) && "href" in attribs
      ) {
        logger.debug(
          "Found alternate document: {alternateUrl} from {url}",
          { alternateUrl: attribs.href, url: documentUrl },
        );
        return await fetch(attribs.href);
      }
    }
  }
  logger.debug(
    "Fetched document: {status} {url} {headers}",
    {