Unverified Commit 2bdcb24d authored by Hong Minhee's avatar Hong Minhee
Browse files

Fix ReDoS vulnerability in HTML parsing (CVE-2025-68475)

The document loader's HTML parsing regex contained nested quantifiers
that caused catastrophic backtracking when processing maliciously
crafted HTML responses.  An attacker-controlled server could respond
with a small (~170 bytes) payload that blocked the event loop for 14+
seconds.

Changes:

- Replace vulnerable regex with safe patterns without nested quantifiers
- Add 1MB HTML response size limit as additional mitigation
- Add regression test for ReDoS resistance
- Pin Deno version to 2.4.5 in CI workflow

https://github.com/fedify-dev/fedify/security/advisories/GHSA-rchf-xwx2-hm93



Co-Authored-By: default avatarClaude <noreply@anthropic.com>
parent 60ac4059
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -124,6 +124,13 @@ Released on June 30, 2025.
    typed literal object (e.g., `"votersCount":{"type":"xsd:nonNegativeInteger",
    "@value":123}`).

 -  Fixed a ReDoS (Regular Expression Denial of Service) vulnerability in
    the document loader's HTML parsing.  An attacker-controlled server could
    respond with a malicious HTML payload that blocked the event loop.
    [[CVE-2025-68475]]

[CVE-2025-68475]: https://github.com/fedify-dev/fedify/security/advisories/GHSA-rchf-xwx2-hm93


Version 1.6.2
-------------
+29 −1
Original line number Diff line number Diff line
import { assertEquals, assertRejects, assertThrows } from "@std/assert";
import { assert, assertEquals, assertRejects, assertThrows } from "@std/assert";
import fetchMock from "fetch-mock";
import process from "node:process";
import metadata from "../deno.json" with { type: "json" };
@@ -364,6 +364,34 @@ test("getDocumentLoader()", async (t) => {
    );
  });

  // Regression test for ReDoS vulnerability (CVE-2025-68475)
  // Malicious HTML payload: <a a="b" a="b" ... (unclosed tag)
  // With the vulnerable regex, this causes catastrophic backtracking
  const maliciousPayload = "<a" + ' a="b"'.repeat(30) + " ";

  fetchMock.get("https://example.com/redos", {
    body: maliciousPayload,
    headers: { "Content-Type": "text/html; charset=utf-8" },
  });

  await t.step("ReDoS resistance (CVE-2025-68475)", async () => {
    const start = performance.now();
    // The malicious HTML will fail JSON parsing, but the important thing is
    // that it should complete quickly (not hang due to ReDoS)
    await assertRejects(
      () => fetchDocumentLoader("https://example.com/redos"),
      SyntaxError,
    );
    const elapsed = performance.now() - start;

    // Should complete in under 1 second. With the vulnerable regex,
    // this would take 14+ seconds for 30 repetitions.
    assert(
      elapsed < 1000,
      `Potential ReDoS vulnerability detected: ${elapsed}ms (expected < 1000ms)`,
    );
  });

  fetchMock.hardReset();
});

+46 −28
Original line number Diff line number Diff line
@@ -235,21 +235,38 @@ export async function getRemoteDocument(
      contentType === "application/xhtml+xml" ||
      contentType?.startsWith("application/xhtml+xml;"))
  ) {
    const p =
      /<(a|link)((\s+[a-z][a-z:_-]*=("[^"]*"|'[^']*'|[^\s>]+))+)\s*\/?>/ig;
    const p2 = /\s+([a-z][a-z:_-]*)=("([^"]*)"|'([^']*)'|([^\s>]+))/ig;
    // Security: Limit HTML response size to mitigate ReDoS attacks
    const MAX_HTML_SIZE = 1024 * 1024; // 1MB
    const html = await response.text();
    let m: RegExpExecArray | null;
    const rawAttribs: string[] = [];
    while ((m = p.exec(html)) !== null) rawAttribs.push(m[2]);
    for (const rawAttrs of rawAttribs) {
      let m2: RegExpExecArray | null;
    if (html.length > MAX_HTML_SIZE) {
      logger.warn(
        "HTML response too large, skipping alternate link discovery: {url}",
        { url: documentUrl, size: html.length },
      );
      document = JSON.parse(html);
    } else {
      // Safe regex patterns without nested quantifiers to prevent ReDoS
      // (CVE-2025-68475)
      // Step 1: Extract <a ...> or <link ...> tags
      const tagPattern = /<(a|link)\s+([^>]*?)\s*\/?>/gi;
      // Step 2: Parse attributes
      const attrPattern =
        /([a-z][a-z:_-]*)=(?:"([^"]*)"|'([^']*)'|([^\s>]+))/gi;

      let tagMatch: RegExpExecArray | null;
      while ((tagMatch = tagPattern.exec(html)) !== null) {
        const tagContent = tagMatch[2];
        let attrMatch: RegExpExecArray | null;
        const attribs: Record<string, string> = {};
      while ((m2 = p2.exec(rawAttrs)) !== null) {
        const key = m2[1].toLowerCase();
        const value = m2[3] ?? m2[4] ?? m2[5] ?? "";

        // Reset regex state for attribute parsing
        attrPattern.lastIndex = 0;
        while ((attrMatch = attrPattern.exec(tagContent)) !== null) {
          const key = attrMatch[1].toLowerCase();
          const value = attrMatch[2] ?? attrMatch[3] ?? attrMatch[4] ?? "";
          attribs[key] = value;
        }

        if (
          attribs.rel === "alternate" && "type" in attribs && (
            attribs.type === "application/activity+json" ||
@@ -266,6 +283,7 @@ export async function getRemoteDocument(
        }
      }
      document = JSON.parse(html);
    }
  } else {
    document = await response.json();
  }