Files
anotherreflections-website-v2/scripts/migrate-wp.mjs
striker 0c3e248ccc feat: каркас v2 — Astro 5 + миграция 50 постов из WP + RSS для IPB Importer
- Astro 5 (6.3.6) minimal scaffold с Content Collections для posts/pages
- Тёмная палитра в духе старой темы darkness-10 (тонкая типографика Inter+Lora)
- Layout с шапкой/футером, главной-лентой, страницами категорий и одиночными постами
- RSS (общий + per-category) под IPB RSS Importer: RFC-822 pubDate,
  <guid isPermaLink="true">, <content:encoded> с CDATA, <lastBuildDate>
- RSS_CUTOFF фильтр: архив 2009-2015 на сайте остаётся, в RSS — только новые
- 50 постов и 6 страниц мигрированы из WP (anotherreflctions_ru @ db.hhivp.com)
  через scripts/migrate-wp.mjs (HTML→md без внешних зависимостей)
- sitemap.xml автоматически через @astrojs/sitemap
2026-05-21 00:58:44 +03:00

145 lines
5.0 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env node
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const ROOT = path.resolve(__dirname, '..');
const EXPORT_PATH = path.join(ROOT, '_wp-export.json');
const POSTS_DIR = path.join(ROOT, 'src/content/posts');
const PAGES_DIR = path.join(ROOT, 'src/content/pages');
const data = JSON.parse(fs.readFileSync(EXPORT_PATH, 'utf8'));
fs.mkdirSync(POSTS_DIR, { recursive: true });
fs.mkdirSync(PAGES_DIR, { recursive: true });
const decodeEntities = (s) => s
.replace(/&nbsp;/g, ' ')
.replace(/&laquo;/g, '«')
.replace(/&raquo;/g, '»')
.replace(/&mdash;/g, '—')
.replace(/&ndash;/g, '')
.replace(/&hellip;/g, '…')
.replace(/&quot;/g, '"')
.replace(/&#039;/g, "'")
.replace(/&#x27;/g, "'")
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&');
const htmlToMd = (html) => {
if (!html) return '';
let s = html;
// images
s = s.replace(/<img[^>]*?src=["']([^"']+)["'][^>]*?(?:alt=["']([^"']*)["'])?[^>]*?\/?>/gi,
(_, src, alt) => `![${alt || ''}](${src})`);
// links
s = s.replace(/<a\s+[^>]*?href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/gi,
(_, href, text) => `[${text.replace(/<[^>]+>/g,'').trim()}](${href})`);
// bold
s = s.replace(/<(strong|b)\b[^>]*>([\s\S]*?)<\/\1>/gi, '**$2**');
// italic
s = s.replace(/<(em|i)\b[^>]*>([\s\S]*?)<\/\1>/gi, '*$2*');
// blockquote
s = s.replace(/<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi,
(_, inner) => inner.trim().split(/\n+/).map(l => '> ' + l.trim()).join('\n'));
// lists
s = s.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, '- $1\n');
s = s.replace(/<\/?(ul|ol)[^>]*>/gi, '\n');
// paragraphs
s = s.replace(/<p[^>]*>/gi, '\n\n');
s = s.replace(/<\/p>/gi, '\n\n');
// br
s = s.replace(/<br\s*\/?>/gi, '\n');
// strip remaining span/div etc.
s = s.replace(/<\/?(?:span|div|font|center)[^>]*>/gi, '');
// any remaining tags — drop
s = s.replace(/<[^>]+>/g, '');
// entities
s = decodeEntities(s);
// collapse 3+ blank lines
s = s.replace(/\n{3,}/g, '\n\n');
return s.trim();
};
const yamlEscape = (s) => {
if (s == null) return '""';
const str = String(s);
if (/^[\w\-.,!?:; À-￿()«»—–"']+$/.test(str) && !/^[-?:&*!|>%@`]/.test(str)) {
return JSON.stringify(str);
}
return JSON.stringify(str);
};
const fmtFrontmatter = (obj) => {
const lines = ['---'];
for (const [k, v] of Object.entries(obj)) {
if (v === undefined || v === null) continue;
if (Array.isArray(v)) {
if (v.length === 0) continue;
lines.push(`${k}:`);
v.forEach(item => lines.push(` - ${yamlEscape(item)}`));
} else if (typeof v === 'object') {
lines.push(`${k}:`);
Object.entries(v).forEach(([kk, vv]) => lines.push(` ${kk}: ${yamlEscape(vv)}`));
} else {
lines.push(`${k}: ${yamlEscape(v)}`);
}
}
lines.push('---');
return lines.join('\n');
};
const authorsMap = Object.fromEntries(data.authors.map(a => [a.id, a.display_name || a.login]));
// Posts
let postCount = 0;
for (const p of data.posts) {
const fm = {
title: p.title,
pubDate: p.date.replace(' ', 'T') + '+03:00',
updatedDate: p.modified && p.modified !== p.date ? p.modified.replace(' ','T') + '+03:00' : undefined,
slug: p.slug,
legacyId: p.id,
author: authorsMap[p.author_id] || 'admin',
categories: p.categories.map(c => c.name),
categorySlugs: p.categories.map(c => c.slug),
tags: p.tags.map(t => t.name),
description: (htmlToMd(p.excerpt) || htmlToMd(p.content_html)).slice(0, 200).replace(/\s+/g,' ').trim(),
};
const body = htmlToMd(p.content_html);
const out = `${fmtFrontmatter(fm)}\n\n${body}\n`;
const safeName = p.slug.replace(/[^a-z0-9а-я\-]/gi, '-').slice(0, 80) || `post-${p.id}`;
fs.writeFileSync(path.join(POSTS_DIR, `${safeName}.md`), out, 'utf8');
postCount++;
}
// Pages
let pageCount = 0;
for (const pg of data.pages) {
const fm = {
title: pg.title,
slug: pg.slug,
legacyId: pg.id,
menuOrder: pg.menu_order,
pubDate: pg.date.replace(' ', 'T') + '+03:00',
updatedDate: pg.modified !== pg.date ? pg.modified.replace(' ','T') + '+03:00' : undefined,
};
const body = htmlToMd(pg.content_html);
const out = `${fmtFrontmatter(fm)}\n\n${body}\n`;
const safeName = pg.slug.replace(/[^a-z0-9а-я\-]/gi, '-').slice(0, 80) || `page-${pg.id}`;
fs.writeFileSync(path.join(PAGES_DIR, `${safeName}.md`), out, 'utf8');
pageCount++;
}
// Categories dump (для построения /category/<slug>/ страниц и фидов)
const catsOut = data.categories.map(c => ({
name: c.name, slug: c.slug, description: c.description, count: c.count
}));
fs.writeFileSync(path.join(ROOT, 'src/content/_categories.json'), JSON.stringify(catsOut, null, 2), 'utf8');
console.log(`Migrated ${postCount} posts → ${POSTS_DIR}`);
console.log(`Migrated ${pageCount} pages → ${PAGES_DIR}`);
console.log(`Categories: ${catsOut.length} → src/content/_categories.json`);