- Astro 5 (6.3.6) minimal scaffold с Content Collections для posts/pages - Тёмная палитра в духе старой темы darkness-10 (тонкая типографика Inter+Lora) - Layout с шапкой/футером, главной-лентой, страницами категорий и одиночными постами - RSS (общий + per-category) под IPB RSS Importer: RFC-822 pubDate, <guid isPermaLink="true">, <content:encoded> с CDATA, <lastBuildDate> - RSS_CUTOFF фильтр: архив 2009-2015 на сайте остаётся, в RSS — только новые - 50 постов и 6 страниц мигрированы из WP (anotherreflctions_ru @ db.hhivp.com) через scripts/migrate-wp.mjs (HTML→md без внешних зависимостей) - sitemap.xml автоматически через @astrojs/sitemap
145 lines
5.0 KiB
JavaScript
145 lines
5.0 KiB
JavaScript
#!/usr/bin/env node
|
||
import fs from 'node:fs';
|
||
import path from 'node:path';
|
||
import { fileURLToPath } from 'node:url';
|
||
|
||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||
const ROOT = path.resolve(__dirname, '..');
|
||
const EXPORT_PATH = path.join(ROOT, '_wp-export.json');
|
||
const POSTS_DIR = path.join(ROOT, 'src/content/posts');
|
||
const PAGES_DIR = path.join(ROOT, 'src/content/pages');
|
||
|
||
const data = JSON.parse(fs.readFileSync(EXPORT_PATH, 'utf8'));
|
||
|
||
fs.mkdirSync(POSTS_DIR, { recursive: true });
|
||
fs.mkdirSync(PAGES_DIR, { recursive: true });
|
||
|
||
const decodeEntities = (s) => s
|
||
.replace(/ /g, ' ')
|
||
.replace(/«/g, '«')
|
||
.replace(/»/g, '»')
|
||
.replace(/—/g, '—')
|
||
.replace(/–/g, '–')
|
||
.replace(/…/g, '…')
|
||
.replace(/"/g, '"')
|
||
.replace(/'/g, "'")
|
||
.replace(/'/g, "'")
|
||
.replace(/</g, '<')
|
||
.replace(/>/g, '>')
|
||
.replace(/&/g, '&');
|
||
|
||
const htmlToMd = (html) => {
|
||
if (!html) return '';
|
||
let s = html;
|
||
// images
|
||
s = s.replace(/<img[^>]*?src=["']([^"']+)["'][^>]*?(?:alt=["']([^"']*)["'])?[^>]*?\/?>/gi,
|
||
(_, src, alt) => ``);
|
||
// links
|
||
s = s.replace(/<a\s+[^>]*?href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/gi,
|
||
(_, href, text) => `[${text.replace(/<[^>]+>/g,'').trim()}](${href})`);
|
||
// bold
|
||
s = s.replace(/<(strong|b)\b[^>]*>([\s\S]*?)<\/\1>/gi, '**$2**');
|
||
// italic
|
||
s = s.replace(/<(em|i)\b[^>]*>([\s\S]*?)<\/\1>/gi, '*$2*');
|
||
// blockquote
|
||
s = s.replace(/<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi,
|
||
(_, inner) => inner.trim().split(/\n+/).map(l => '> ' + l.trim()).join('\n'));
|
||
// lists
|
||
s = s.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, '- $1\n');
|
||
s = s.replace(/<\/?(ul|ol)[^>]*>/gi, '\n');
|
||
// paragraphs
|
||
s = s.replace(/<p[^>]*>/gi, '\n\n');
|
||
s = s.replace(/<\/p>/gi, '\n\n');
|
||
// br
|
||
s = s.replace(/<br\s*\/?>/gi, '\n');
|
||
// strip remaining span/div etc.
|
||
s = s.replace(/<\/?(?:span|div|font|center)[^>]*>/gi, '');
|
||
// any remaining tags — drop
|
||
s = s.replace(/<[^>]+>/g, '');
|
||
// entities
|
||
s = decodeEntities(s);
|
||
// collapse 3+ blank lines
|
||
s = s.replace(/\n{3,}/g, '\n\n');
|
||
return s.trim();
|
||
};
|
||
|
||
const yamlEscape = (s) => {
|
||
if (s == null) return '""';
|
||
const str = String(s);
|
||
if (/^[\w\-.,!?:; À-()«»—–"']+$/.test(str) && !/^[-?:&*!|>%@`]/.test(str)) {
|
||
return JSON.stringify(str);
|
||
}
|
||
return JSON.stringify(str);
|
||
};
|
||
|
||
const fmtFrontmatter = (obj) => {
|
||
const lines = ['---'];
|
||
for (const [k, v] of Object.entries(obj)) {
|
||
if (v === undefined || v === null) continue;
|
||
if (Array.isArray(v)) {
|
||
if (v.length === 0) continue;
|
||
lines.push(`${k}:`);
|
||
v.forEach(item => lines.push(` - ${yamlEscape(item)}`));
|
||
} else if (typeof v === 'object') {
|
||
lines.push(`${k}:`);
|
||
Object.entries(v).forEach(([kk, vv]) => lines.push(` ${kk}: ${yamlEscape(vv)}`));
|
||
} else {
|
||
lines.push(`${k}: ${yamlEscape(v)}`);
|
||
}
|
||
}
|
||
lines.push('---');
|
||
return lines.join('\n');
|
||
};
|
||
|
||
const authorsMap = Object.fromEntries(data.authors.map(a => [a.id, a.display_name || a.login]));
|
||
|
||
// Posts
|
||
let postCount = 0;
|
||
for (const p of data.posts) {
|
||
const fm = {
|
||
title: p.title,
|
||
pubDate: p.date.replace(' ', 'T') + '+03:00',
|
||
updatedDate: p.modified && p.modified !== p.date ? p.modified.replace(' ','T') + '+03:00' : undefined,
|
||
slug: p.slug,
|
||
legacyId: p.id,
|
||
author: authorsMap[p.author_id] || 'admin',
|
||
categories: p.categories.map(c => c.name),
|
||
categorySlugs: p.categories.map(c => c.slug),
|
||
tags: p.tags.map(t => t.name),
|
||
description: (htmlToMd(p.excerpt) || htmlToMd(p.content_html)).slice(0, 200).replace(/\s+/g,' ').trim(),
|
||
};
|
||
const body = htmlToMd(p.content_html);
|
||
const out = `${fmtFrontmatter(fm)}\n\n${body}\n`;
|
||
const safeName = p.slug.replace(/[^a-z0-9а-я\-]/gi, '-').slice(0, 80) || `post-${p.id}`;
|
||
fs.writeFileSync(path.join(POSTS_DIR, `${safeName}.md`), out, 'utf8');
|
||
postCount++;
|
||
}
|
||
|
||
// Pages
|
||
let pageCount = 0;
|
||
for (const pg of data.pages) {
|
||
const fm = {
|
||
title: pg.title,
|
||
slug: pg.slug,
|
||
legacyId: pg.id,
|
||
menuOrder: pg.menu_order,
|
||
pubDate: pg.date.replace(' ', 'T') + '+03:00',
|
||
updatedDate: pg.modified !== pg.date ? pg.modified.replace(' ','T') + '+03:00' : undefined,
|
||
};
|
||
const body = htmlToMd(pg.content_html);
|
||
const out = `${fmtFrontmatter(fm)}\n\n${body}\n`;
|
||
const safeName = pg.slug.replace(/[^a-z0-9а-я\-]/gi, '-').slice(0, 80) || `page-${pg.id}`;
|
||
fs.writeFileSync(path.join(PAGES_DIR, `${safeName}.md`), out, 'utf8');
|
||
pageCount++;
|
||
}
|
||
|
||
// Categories dump (для построения /category/<slug>/ страниц и фидов)
|
||
const catsOut = data.categories.map(c => ({
|
||
name: c.name, slug: c.slug, description: c.description, count: c.count
|
||
}));
|
||
fs.writeFileSync(path.join(ROOT, 'src/content/_categories.json'), JSON.stringify(catsOut, null, 2), 'utf8');
|
||
|
||
console.log(`Migrated ${postCount} posts → ${POSTS_DIR}`);
|
||
console.log(`Migrated ${pageCount} pages → ${PAGES_DIR}`);
|
||
console.log(`Categories: ${catsOut.length} → src/content/_categories.json`);
|