"use node"; import { action } from "./_generated/server"; import { v } from "convex/values"; export const fetchPreview = action({ args: { url: v.string() }, returns: v.union( v.object({ url: v.string(), title: v.optional(v.string()), description: v.optional(v.string()), image: v.optional(v.string()), siteName: v.optional(v.string()), }), v.null(), ), handler: async (_ctx, args) => { try { // Validate URL + prevent loopback SSRF const u = new URL(args.url); if (u.protocol !== "http:" && u.protocol !== "https:") return null; const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), 8000); const res = await fetch(u.toString(), { method: "GET", headers: { // Discordbot User-Agent — a lot of sites (YouTube included) // only emit og: metadata when they recognise a known crawler, // and the generic Brycord UA gets routed to consent / interstitial // pages that never include the tags we're after. "User-Agent": "Mozilla/5.0 (compatible; Discordbot/2.0; +https://discordapp.com)", Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.9", }, signal: controller.signal, redirect: "follow", }); clearTimeout(timeout); if (!res.ok) return null; const contentType = res.headers.get("content-type") || ""; if (!contentType.includes("text/html")) return null; // Read up to 512 KB so giant pages don't DOS the action const reader = res.body?.getReader(); if (!reader) return null; const chunks: Uint8Array[] = []; let total = 0; const MAX = 512 * 1024; while (total < MAX) { const { value, done } = await reader.read(); if (done) break; if (value) { chunks.push(value); total += value.length; } } try { await reader.cancel(); } catch {} const merged = new Uint8Array(total); let offset = 0; for (const c of chunks) { merged.set(c, offset); offset += c.length; } const html = new TextDecoder("utf-8").decode(merged); // Parse OG / twitter /