synops/maskinrommet/src/tiptap.rs
vegard e050612dec HTML-rendering av enkeltartikler til CAS med SEO-metadata (oppgave 14.2)
Implementerer rendering-pipeline: metadata.document (TipTap JSON) → HTML
via Tera-templates → CAS-lagring → metadata.rendered oppdateres.

Nye moduler:
- tiptap.rs: Konverterer TipTap/ProseMirror JSON til HTML. Støtter
  paragraph, heading, blockquote, lister, code_block, image, hr,
  og marks (bold, italic, strike, code, link, underline).
  XSS-sikker med HTML-escaping.

- render_article jobb i jobbkøen: Henter node + samling, konverterer
  document → HTML, rendrer med Tera + tema, lagrer i CAS, oppdaterer
  nodens metadata.rendered med html_hash og renderer_version.

Endringer:
- publishing.rs: SeoData-struct med OG-tags, canonical URL, JSON-LD.
  render_article_to_cas() for full pipeline. serve_article() serverer
  fra CAS (immutable cache) hvis pre-rendret, fallback til on-the-fly.
  RENDERER_VERSION=1 for fremtidig bulk re-rendering.

- intentions.rs: Trigger render_article-jobb automatisk når belongs_to
  edge opprettes til samling med publishing-trait.

- Alle 4 artikkel-templates: SEO-block med meta description, OG-tags
  (type, title, description, url, site_name, image, published_time),
  canonical URL, RSS-link, og JSON-LD structured data.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-18 00:52:58 +00:00

380 lines
12 KiB
Rust

//! TipTap/ProseMirror JSON → HTML-konvertering.
//!
//! Konverterer `metadata.document` (TipTap JSON) til HTML-streng.
//! Støtter vanlige nodetyper: paragraph, heading, blockquote, bullet_list,
//! ordered_list, list_item, code_block, horizontal_rule, image, hard_break.
//! Støtter marks: bold, italic, strike, code, link, underline.
use serde_json::Value;
/// Konverter et TipTap/ProseMirror-dokument (JSON) til HTML.
/// Returnerer tom streng hvis dokumentet er ugyldig.
pub fn document_to_html(doc: &Value) -> String {
let Some(content) = doc.get("content").and_then(|c| c.as_array()) else {
return String::new();
};
let mut html = String::new();
for node in content {
render_node(node, &mut html);
}
html
}
fn render_node(node: &Value, out: &mut String) {
let node_type = node.get("type").and_then(|t| t.as_str()).unwrap_or("");
match node_type {
"paragraph" => {
out.push_str("<p>");
render_inline_content(node, out);
out.push_str("</p>\n");
}
"heading" => {
let level = node
.get("attrs")
.and_then(|a| a.get("level"))
.and_then(|l| l.as_u64())
.unwrap_or(2)
.min(6);
out.push_str(&format!("<h{level}>"));
render_inline_content(node, out);
out.push_str(&format!("</h{level}>\n"));
}
"blockquote" => {
out.push_str("<blockquote>\n");
render_children(node, out);
out.push_str("</blockquote>\n");
}
"bulletList" | "bullet_list" => {
out.push_str("<ul>\n");
render_children(node, out);
out.push_str("</ul>\n");
}
"orderedList" | "ordered_list" => {
let start = node
.get("attrs")
.and_then(|a| a.get("start"))
.and_then(|s| s.as_u64())
.unwrap_or(1);
if start == 1 {
out.push_str("<ol>\n");
} else {
out.push_str(&format!("<ol start=\"{start}\">\n"));
}
render_children(node, out);
out.push_str("</ol>\n");
}
"listItem" | "list_item" => {
out.push_str("<li>");
render_children(node, out);
out.push_str("</li>\n");
}
"codeBlock" | "code_block" => {
let lang = node
.get("attrs")
.and_then(|a| a.get("language"))
.and_then(|l| l.as_str())
.unwrap_or("");
if lang.is_empty() {
out.push_str("<pre><code>");
} else {
out.push_str(&format!("<pre><code class=\"language-{}\">", escape_html(lang)));
}
render_inline_content(node, out);
out.push_str("</code></pre>\n");
}
"horizontalRule" | "horizontal_rule" => {
out.push_str("<hr>\n");
}
"image" => {
let attrs = node.get("attrs");
let src = attrs
.and_then(|a| a.get("src"))
.and_then(|s| s.as_str())
.unwrap_or("");
let alt = attrs
.and_then(|a| a.get("alt"))
.and_then(|s| s.as_str())
.unwrap_or("");
let title = attrs
.and_then(|a| a.get("title"))
.and_then(|s| s.as_str());
out.push_str(&format!(
"<img src=\"{}\" alt=\"{}\"",
escape_attr(src),
escape_attr(alt)
));
if let Some(t) = title {
out.push_str(&format!(" title=\"{}\"", escape_attr(t)));
}
out.push_str(">\n");
}
"hardBreak" | "hard_break" => {
out.push_str("<br>");
}
_ => {
// Ukjent nodetype — render barn rekursivt
render_children(node, out);
}
}
}
fn render_children(node: &Value, out: &mut String) {
if let Some(content) = node.get("content").and_then(|c| c.as_array()) {
for child in content {
render_node(child, out);
}
}
}
fn render_inline_content(node: &Value, out: &mut String) {
let Some(content) = node.get("content").and_then(|c| c.as_array()) else {
return;
};
for child in content {
let child_type = child.get("type").and_then(|t| t.as_str()).unwrap_or("");
match child_type {
"text" => {
let text = child.get("text").and_then(|t| t.as_str()).unwrap_or("");
let marks = child.get("marks").and_then(|m| m.as_array());
render_text_with_marks(text, marks, out);
}
"hardBreak" | "hard_break" => {
out.push_str("<br>");
}
"image" => {
render_node(child, out);
}
_ => {
// Ukjent inline-type — render rekursivt
render_node(child, out);
}
}
}
}
fn render_text_with_marks(text: &str, marks: Option<&Vec<Value>>, out: &mut String) {
let Some(marks) = marks else {
out.push_str(&escape_html(text));
return;
};
// Åpne marks
let mut close_tags: Vec<&str> = Vec::new();
for mark in marks {
let mark_type = mark.get("type").and_then(|t| t.as_str()).unwrap_or("");
match mark_type {
"bold" | "strong" => {
out.push_str("<strong>");
close_tags.push("</strong>");
}
"italic" | "em" => {
out.push_str("<em>");
close_tags.push("</em>");
}
"strike" | "strikethrough" => {
out.push_str("<s>");
close_tags.push("</s>");
}
"code" => {
out.push_str("<code>");
close_tags.push("</code>");
}
"underline" => {
out.push_str("<u>");
close_tags.push("</u>");
}
"link" => {
let href = mark
.get("attrs")
.and_then(|a| a.get("href"))
.and_then(|h| h.as_str())
.unwrap_or("#");
let target = mark
.get("attrs")
.and_then(|a| a.get("target"))
.and_then(|t| t.as_str());
out.push_str(&format!("<a href=\"{}\"", escape_attr(href)));
if let Some(t) = target {
out.push_str(&format!(" target=\"{}\"", escape_attr(t)));
}
out.push_str(&format!(" rel=\"noopener noreferrer\">"));
close_tags.push("</a>");
}
_ => {} // Ukjent mark — ignorer
}
}
out.push_str(&escape_html(text));
// Lukk marks i motsatt rekkefølge
for tag in close_tags.iter().rev() {
out.push_str(tag);
}
}
fn escape_html(s: &str) -> String {
s.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
}
fn escape_attr(s: &str) -> String {
s.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
.replace('"', "&quot;")
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn simple_paragraph() {
let doc = json!({
"type": "doc",
"content": [{
"type": "paragraph",
"content": [{ "type": "text", "text": "Hello world" }]
}]
});
assert_eq!(document_to_html(&doc), "<p>Hello world</p>\n");
}
#[test]
fn heading_levels() {
let doc = json!({
"type": "doc",
"content": [{
"type": "heading",
"attrs": { "level": 2 },
"content": [{ "type": "text", "text": "Title" }]
}]
});
assert_eq!(document_to_html(&doc), "<h2>Title</h2>\n");
}
#[test]
fn bold_and_italic_marks() {
let doc = json!({
"type": "doc",
"content": [{
"type": "paragraph",
"content": [{
"type": "text",
"text": "bold text",
"marks": [{ "type": "bold" }]
}]
}]
});
assert_eq!(document_to_html(&doc), "<p><strong>bold text</strong></p>\n");
}
#[test]
fn link_mark() {
let doc = json!({
"type": "doc",
"content": [{
"type": "paragraph",
"content": [{
"type": "text",
"text": "click here",
"marks": [{ "type": "link", "attrs": { "href": "https://example.com" } }]
}]
}]
});
let html = document_to_html(&doc);
assert!(html.contains("href=\"https://example.com\""));
assert!(html.contains("rel=\"noopener noreferrer\""));
assert!(html.contains("click here</a>"));
}
#[test]
fn blockquote() {
let doc = json!({
"type": "doc",
"content": [{
"type": "blockquote",
"content": [{
"type": "paragraph",
"content": [{ "type": "text", "text": "quoted" }]
}]
}]
});
let html = document_to_html(&doc);
assert!(html.contains("<blockquote>"));
assert!(html.contains("<p>quoted</p>"));
}
#[test]
fn bullet_list() {
let doc = json!({
"type": "doc",
"content": [{
"type": "bulletList",
"content": [
{ "type": "listItem", "content": [{ "type": "paragraph", "content": [{ "type": "text", "text": "item 1" }] }] },
{ "type": "listItem", "content": [{ "type": "paragraph", "content": [{ "type": "text", "text": "item 2" }] }] }
]
}]
});
let html = document_to_html(&doc);
assert!(html.contains("<ul>"));
assert!(html.contains("<li>"));
assert!(html.contains("item 1"));
assert!(html.contains("item 2"));
}
#[test]
fn html_escaping() {
let doc = json!({
"type": "doc",
"content": [{
"type": "paragraph",
"content": [{ "type": "text", "text": "<script>alert('xss')</script>" }]
}]
});
let html = document_to_html(&doc);
assert!(!html.contains("<script>"));
assert!(html.contains("&lt;script&gt;"));
}
#[test]
fn empty_doc() {
let doc = json!({ "type": "doc" });
assert_eq!(document_to_html(&doc), "");
}
#[test]
fn image_node() {
let doc = json!({
"type": "doc",
"content": [{
"type": "image",
"attrs": { "src": "/cas/abc123", "alt": "Test image" }
}]
});
let html = document_to_html(&doc);
assert!(html.contains("src=\"/cas/abc123\""));
assert!(html.contains("alt=\"Test image\""));
}
#[test]
fn code_block() {
let doc = json!({
"type": "doc",
"content": [{
"type": "codeBlock",
"attrs": { "language": "rust" },
"content": [{ "type": "text", "text": "fn main() {}" }]
}]
});
let html = document_to_html(&doc);
assert!(html.contains("<pre><code class=\"language-rust\">"));
assert!(html.contains("fn main() {}"));
}
}