init: Vite+React+Tailwind v2 site with HTML content from WP, RSS feed, external feed aggregator, prerender

2026-05-21 01:11:26 +03:00
commit 76cdeb8b48
42 changed files with 6317 additions and 0 deletions
--- a/scripts/build-rss.js
+++ b/scripts/build-rss.js
@@ -0,0 +1,73 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const ROOT = path.resolve(__dirname, '..');
+const DIST = path.join(ROOT, 'dist');
+
+const SITE = 'https://pushkinohistory.ru';
+const TITLE = 'История города Пушкино';
+const DESC = 'Прошлое, настоящее, будущее города Пушкино.';
+
+const posts = JSON.parse(fs.readFileSync(path.join(ROOT, 'src/content/posts.json'), 'utf8'));
+
+const escapeXml = (s) =>
+  String(s)
+    .replace(/&/g, '&amp;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;')
+    .replace(/"/g, '&quot;')
+    .replace(/'/g, '&apos;');
+
+const cdata = (s) => `<![CDATA[${String(s).replace(/]]>/g, ']]]]><![CDATA[>')}]]>`;
+
+const rfc2822 = (s) => {
+  const d = new Date(s.replace(' ', 'T') + '+03:00');
+  return d.toUTCString();
+};
+
+const absoluteImages = (html) =>
+  html.replace(/(src|href)="\/uploads\//g, `$1="${SITE}/uploads/`);
+
+const items = posts.map((p) => {
+  const html = absoluteImages(p.html);
+  const description = p.excerpt
+    ? p.excerpt
+    : html.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim().slice(0, 500);
+  const url = `${SITE}/${p.slug}/`;
+  return `    <item>
+      <title>${escapeXml(p.title)}</title>
+      <link>${url}</link>
+      <guid isPermaLink="true">${url}</guid>
+      <pubDate>${rfc2822(p.date)}</pubDate>
+      <dc:creator>${cdata('История города Пушкино')}</dc:creator>
+      ${(p.categories || []).map((c) => `<category>${escapeXml(c)}</category>`).join('\n      ')}
+      <description>${cdata(description)}</description>
+      <content:encoded>${cdata(html)}</content:encoded>
+    </item>`;
+}).join('\n');
+
+const lastBuild = new Date().toUTCString();
+
+const rss = `<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0"
+     xmlns:content="http://purl.org/rss/1.0/modules/content/"
+     xmlns:dc="http://purl.org/dc/elements/1.1/"
+     xmlns:atom="http://www.w3.org/2005/Atom">
+  <channel>
+    <title>${escapeXml(TITLE)}</title>
+    <link>${SITE}/</link>
+    <atom:link href="${SITE}/feed/" rel="self" type="application/rss+xml" />
+    <description>${escapeXml(DESC)}</description>
+    <language>ru-RU</language>
+    <lastBuildDate>${lastBuild}</lastBuildDate>
+    <ttl>60</ttl>
+${items}
+  </channel>
+</rss>
+`;
+
+fs.mkdirSync(DIST, { recursive: true });
+fs.writeFileSync(path.join(DIST, 'feed.xml'), rss);
+console.log(`rss: ${posts.length} items → dist/feed.xml`);
--- a/scripts/build-sitemap.js
+++ b/scripts/build-sitemap.js
@@ -0,0 +1,58 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const ROOT = path.resolve(__dirname, '..');
+const DIST = path.join(ROOT, 'dist');
+
+const SITE = 'https://pushkinohistory.ru';
+
+const posts = JSON.parse(fs.readFileSync(path.join(ROOT, 'src/content/posts.json'), 'utf8'));
+const pages = JSON.parse(fs.readFileSync(path.join(ROOT, 'src/content/pages.json'), 'utf8'));
+
+const today = new Date().toISOString().slice(0, 10);
+
+const urls = [
+  { loc: `${SITE}/`, lastmod: today, priority: '1.0', changefreq: 'weekly' },
+  { loc: `${SITE}/news/`, lastmod: today, priority: '0.8', changefreq: 'hourly' },
+];
+for (const p of pages) {
+  urls.push({
+    loc: `${SITE}/${p.slug}/`,
+    lastmod: p.date.slice(0, 10),
+    priority: '0.7',
+    changefreq: 'yearly',
+  });
+}
+for (const p of posts) {
+  urls.push({
+    loc: `${SITE}/${p.slug}/`,
+    lastmod: p.date.slice(0, 10),
+    priority: '0.6',
+    changefreq: 'monthly',
+  });
+}
+
+const xml = `<?xml version="1.0" encoding="UTF-8"?>
+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+${urls.map((u) => `  <url>
+    <loc>${u.loc}</loc>
+    <lastmod>${u.lastmod}</lastmod>
+    <changefreq>${u.changefreq}</changefreq>
+    <priority>${u.priority}</priority>
+  </url>`).join('\n')}
+</urlset>
+`;
+
+fs.mkdirSync(DIST, { recursive: true });
+fs.writeFileSync(path.join(DIST, 'sitemap.xml'), xml);
+
+const robots = `User-agent: *
+Allow: /
+
+Sitemap: ${SITE}/sitemap.xml
+`;
+fs.writeFileSync(path.join(DIST, 'robots.txt'), robots);
+
+console.log(`sitemap: ${urls.length} URLs → dist/sitemap.xml`);
--- a/scripts/build-slugs.js
+++ b/scripts/build-slugs.js
@@ -0,0 +1,22 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const ROOT = path.resolve(__dirname, '..');
+const DIST = path.join(ROOT, 'dist');
+
+const posts = JSON.parse(fs.readFileSync(path.join(ROOT, 'src/content/posts.json'), 'utf8'));
+const pages = JSON.parse(fs.readFileSync(path.join(ROOT, 'src/content/pages.json'), 'utf8'));
+
+const cats = new Set();
+for (const p of posts) (p.categorySlugs || []).forEach((s) => cats.add(s));
+
+const routes = ['/', '/news/'];
+for (const p of posts) routes.push(`/${p.slug}/`);
+for (const p of pages) routes.push(`/${p.slug}/`);
+for (const c of cats) routes.push(`/cat/${c}/`);
+
+fs.mkdirSync(DIST, { recursive: true });
+fs.writeFileSync(path.join(DIST, 'routes.json'), JSON.stringify(routes, null, 2));
+console.log(`routes: ${routes.length} → dist/routes.json`);
--- a/scripts/convert_posts.py
+++ b/scripts/convert_posts.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python3
+"""Convert WP posts-raw.jsonl → src/content/{posts,pages}.json with image URL rewrite + translit slugs."""
+import json
+import re
+import sys
+from pathlib import Path
+from urllib.parse import unquote
+
+ROOT = Path(__file__).resolve().parent.parent
+RAW = ROOT / "scripts" / "posts-raw.jsonl"
+OUT_DIR = ROOT / "src" / "content"
+OUT_DIR.mkdir(parents=True, exist_ok=True)
+
+TRANSLIT = {
+    'а': 'a', 'б': 'b', 'в': 'v', 'г': 'g', 'д': 'd', 'е': 'e', 'ё': 'yo',
+    'ж': 'zh', 'з': 'z', 'и': 'i', 'й': 'y', 'к': 'k', 'л': 'l', 'м': 'm',
+    'н': 'n', 'о': 'o', 'п': 'p', 'р': 'r', 'с': 's', 'т': 't', 'у': 'u',
+    'ф': 'f', 'х': 'h', 'ц': 'ts', 'ч': 'ch', 'ш': 'sh', 'щ': 'sch',
+    'ъ': '', 'ы': 'y', 'ь': '', 'э': 'e', 'ю': 'yu', 'я': 'ya',
+}
+
+def slugify_ru(s: str) -> str:
+    s = unquote(s).lower()
+    out = []
+    for ch in s:
+        if ch in TRANSLIT:
+            out.append(TRANSLIT[ch])
+        elif ch.isalnum() or ch in '-_':
+            out.append(ch)
+        elif ch in ' \t':
+            out.append('-')
+    res = ''.join(out)
+    res = re.sub(r'-+', '-', res).strip('-')
+    return res or 'untitled'
+
+UPLOAD_RE = re.compile(r'https?://(?:www\.)?pushkinohistory\.ru/wp-content/uploads/[^/]+/[^/]+/([^"\'\s)]+)')
+
+def rewrite_uploads(html: str) -> str:
+    return UPLOAD_RE.sub(r'/uploads/\1', html)
+
+CATEGORIES = {
+    20: [], 23: [], 73: [], 94: [],  # pages — no category
+    137: ['main'], 139: ['main'],
+    142: ['main'], 145: ['main', 'today', 'tech'],
+    158: ['main', 'tech'], 226: ['main'], 235: ['main'],
+}
+
+CATEGORY_NAMES = {
+    'main': 'Главная',
+    'today': 'Настоящее',
+    'tech': 'Техническое',
+}
+
+def main() -> None:
+    posts: list[dict] = []
+    pages: list[dict] = []
+    with RAW.open(encoding='utf-8') as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            row = json.loads(line)
+            old_slug = row['name']
+            new_slug = slugify_ru(old_slug if '%' in old_slug else old_slug)
+            html = rewrite_uploads(row['content'])
+            item = {
+                'id': row['id'],
+                'slug': new_slug,
+                'oldSlug': old_slug,
+                'title': row['title'],
+                'date': row['date'],
+                'excerpt': row.get('excerpt') or '',
+                'html': html,
+                'categories': [CATEGORY_NAMES[c] for c in CATEGORIES.get(row['id'], [])],
+                'categorySlugs': CATEGORIES.get(row['id'], []),
+            }
+            if row['type'] == 'post':
+                posts.append(item)
+            else:
+                pages.append(item)
+    posts.sort(key=lambda p: p['date'], reverse=True)
+    pages.sort(key=lambda p: p['id'])
+    (OUT_DIR / 'posts.json').write_text(
+        json.dumps(posts, ensure_ascii=False, indent=2), encoding='utf-8'
+    )
+    (OUT_DIR / 'pages.json').write_text(
+        json.dumps(pages, ensure_ascii=False, indent=2), encoding='utf-8'
+    )
+    print(f'posts: {len(posts)} → src/content/posts.json')
+    print(f'pages: {len(pages)} → src/content/pages.json')
+    for p in posts:
+        print(f"  post: /{p['slug']}/ (was: {p['oldSlug'][:40]}{'...' if len(p['oldSlug']) > 40 else ''}) — {p['title']}")
+    for p in pages:
+        print(f"  page: /{p['slug']}/ — {p['title']}")
+
+if __name__ == '__main__':
+    main()
--- a/scripts/prerender.js
+++ b/scripts/prerender.js
@@ -0,0 +1,53 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import http from 'node:http';
+import { fileURLToPath } from 'node:url';
+import express from 'express';
+import puppeteer from 'puppeteer';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const ROOT = path.resolve(__dirname, '..');
+const DIST = path.join(ROOT, 'dist');
+
+const routes = JSON.parse(fs.readFileSync(path.join(DIST, 'routes.json'), 'utf8'));
+
+const app = express();
+app.use(express.static(DIST));
+app.get('*', (_req, res) => res.sendFile(path.join(DIST, 'index.html')));
+const server = http.createServer(app);
+
+async function start() {
+  await new Promise((resolve) => server.listen(0, resolve));
+  const port = server.address().port;
+  const baseUrl = `http://127.0.0.1:${port}`;
+
+  const launchOpts = { headless: 'new', args: ['--no-sandbox', '--disable-setuid-sandbox'] };
+  if (process.env.PUPPETEER_EXECUTABLE_PATH) {
+    launchOpts.executablePath = process.env.PUPPETEER_EXECUTABLE_PATH;
+  }
+  const browser = await puppeteer.launch(launchOpts);
+
+  for (const route of routes) {
+    const page = await browser.newPage();
+    const url = `${baseUrl}${route}`;
+    try {
+      await page.goto(url, { waitUntil: 'networkidle0', timeout: 15000 });
+    } catch (e) {
+      console.warn(`prerender warn ${route}: ${e.message}`);
+    }
+    const html = await page.content();
+    const outDir = path.join(DIST, route);
+    fs.mkdirSync(outDir, { recursive: true });
+    fs.writeFileSync(path.join(outDir, 'index.html'), html);
+    console.log(`prerender: ${route}`);
+    await page.close();
+  }
+
+  await browser.close();
+  server.close();
+}
+
+start().catch((e) => {
+  console.error(e);
+  process.exit(1);
+});
--- a/scripts/pull-external-rss.js
+++ b/scripts/pull-external-rss.js
@@ -0,0 +1,125 @@
+#!/usr/bin/env node
+/**
+ * Тянет внешние RSS-фиды из src/content/feeds.json, дедуплицирует по guid/link,
+ * пишет агрегированный news.json в DATA_DIR (default: ./data).
+ * Запускается по cron на хосте.
+ */
+import fs from 'node:fs';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { XMLParser } from 'fast-xml-parser';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const ROOT = path.resolve(__dirname, '..');
+const FEEDS_FILE = path.join(ROOT, 'src/content/feeds.json');
+const DATA_DIR = process.env.DATA_DIR || path.join(ROOT, 'data');
+const OUT_FILE = path.join(DATA_DIR, 'news.json');
+const TIMEOUT_MS = 15000;
+const HARD_CAP = 200;
+
+const parser = new XMLParser({
+  ignoreAttributes: false,
+  attributeNamePrefix: '@_',
+  textNodeName: '#text',
+});
+
+async function fetchFeed(url, timeoutMs) {
+  const ctl = new AbortController();
+  const t = setTimeout(() => ctl.abort(), timeoutMs);
+  try {
+    const r = await fetch(url, {
+      signal: ctl.signal,
+      headers: { 'User-Agent': 'pushkinohistory-ru-v2 RSS aggregator' },
+    });
+    if (!r.ok) throw new Error(`HTTP ${r.status}`);
+    return await r.text();
+  } finally {
+    clearTimeout(t);
+  }
+}
+
+function extractItems(xml, feed) {
+  const parsed = parser.parse(xml);
+  // RSS 2.0
+  const rssItems = parsed?.rss?.channel?.item;
+  if (rssItems) {
+    const arr = Array.isArray(rssItems) ? rssItems : [rssItems];
+    return arr.map((it) => ({
+      title: typeof it.title === 'string' ? it.title : it.title?.['#text'] || '',
+      link: typeof it.link === 'string' ? it.link : it.link?.['#text'] || '',
+      guid: typeof it.guid === 'string' ? it.guid : it.guid?.['#text'] || it.link || '',
+      pubDate: it.pubDate ? new Date(it.pubDate).toISOString() : null,
+      description: stripHtml(it.description || it['content:encoded'] || ''),
+      source: feed.name,
+    }));
+  }
+  // Atom
+  const atomEntries = parsed?.feed?.entry;
+  if (atomEntries) {
+    const arr = Array.isArray(atomEntries) ? atomEntries : [atomEntries];
+    return arr.map((e) => {
+      const link = Array.isArray(e.link) ? e.link[0]?.['@_href'] : e.link?.['@_href'] || e.link;
+      return {
+        title: typeof e.title === 'string' ? e.title : e.title?.['#text'] || '',
+        link: link || '',
+        guid: e.id || link || '',
+        pubDate: e.updated || e.published ? new Date(e.updated || e.published).toISOString() : null,
+        description: stripHtml(e.summary?.['#text'] || e.summary || e.content?.['#text'] || ''),
+        source: feed.name,
+      };
+    });
+  }
+  return [];
+}
+
+function stripHtml(s) {
+  if (!s) return '';
+  return String(s).replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim().slice(0, 400);
+}
+
+async function main() {
+  const feeds = JSON.parse(fs.readFileSync(FEEDS_FILE, 'utf8')).filter((f) => f.enabled);
+  if (feeds.length === 0) {
+    console.log('no enabled feeds — writing empty news.json');
+    fs.mkdirSync(DATA_DIR, { recursive: true });
+    fs.writeFileSync(OUT_FILE, JSON.stringify({ updatedAt: new Date().toISOString(), items: [] }, null, 2));
+    return;
+  }
+
+  const all = [];
+  for (const feed of feeds) {
+    try {
+      const xml = await fetchFeed(feed.url, TIMEOUT_MS);
+      const items = extractItems(xml, feed);
+      const max = feed.max || 20;
+      all.push(...items.slice(0, max));
+      console.log(`✓ ${feed.name}: ${items.length} items (kept ${Math.min(items.length, max)})`);
+    } catch (e) {
+      console.warn(`✗ ${feed.name}: ${e.message}`);
+    }
+  }
+
+  const seen = new Set();
+  const deduped = [];
+  for (const it of all) {
+    const key = it.guid || it.link;
+    if (!key || seen.has(key)) continue;
+    seen.add(key);
+    deduped.push(it);
+  }
+  deduped.sort((a, b) => (b.pubDate || '').localeCompare(a.pubDate || ''));
+
+  const out = {
+    updatedAt: new Date().toISOString(),
+    items: deduped.slice(0, HARD_CAP),
+  };
+
+  fs.mkdirSync(DATA_DIR, { recursive: true });
+  fs.writeFileSync(OUT_FILE, JSON.stringify(out, null, 2));
+  console.log(`→ ${OUT_FILE}: ${out.items.length} items`);
+}
+
+main().catch((e) => {
+  console.error(e);
+  process.exit(1);
+});