rewrite: Vite+React → Astro 5 + Content Collections

- Бэкап старой версии на ветке vite-react-backup - Stack: Astro 5 + nginx:alpine runtime, образ ~50 МБ (был ~600 МБ) - @astrojs/rss заменён ручным buildRss() — гарантия CDATA в content:encoded для IPB Importer - @astrojs/sitemap → sitemap-index.xml + sitemap.txt - 152-ФЗ cookie consent + privacy.astro + Analytics с gating - AI-файлы: robots.txt с явным allow для AI-краулеров, ai.txt, llms.txt - Гибридный визуал: фото-фон шапки (аэрофото Пушкино) + PT Serif + IBM Plex Sans - Иерархия: hero "Главная история" с рамкой + "Ещё из истории" + "Хроника" - Категория "main" (псевдо) скрыта из плашек и из Рубрик в сайдбаре - hideFromList: true для технических постов - featuredImage в frontmatter для постов без хорошей первой <img> - WP resized-URL (-WxH.ext) автоматически → оригинал - CI/CD: .gitea/workflows/deploy.yml (push → SSH-build) - Внешние RSS: scripts/pull-external-rss.mjs пишет news.json в bind-mount, фронт фетчит client-side
2026-05-21 03:21:31 +03:00
parent a0219ee8f3
commit c65e07cd98
75 changed files with 5926 additions and 4142 deletions
--- a/scripts/convert_posts.py
+++ b/scripts/convert_posts.py
@@ -34,9 +34,13 @@ def slugify_ru(s: str) -> str:
    return res or 'untitled'

 UPLOAD_RE = re.compile(r'https?://(?:www\.)?pushkinohistory\.ru/wp-content/uploads/[^/]+/[^/]+/([^"\'\s)]+)')
+# WP-resized варианты: file-1024x768.png → file.png. У нас в /uploads/ лежит только оригинал.
+RESIZED_RE = re.compile(r'(/uploads/[^"\'\s)]+?)-\d+x\d+(\.\w+)')

 def rewrite_uploads(html: str) -> str:
-    return UPLOAD_RE.sub(r'/uploads/\1', html)
+    html = UPLOAD_RE.sub(r'/uploads/\1', html)
+    html = RESIZED_RE.sub(r'\1\2', html)
+    return html

 CATEGORIES = {
    20: [], 23: [], 73: [], 94: [],  # pages — no category
--- a/scripts/convert_to_markdown.py
+++ b/scripts/convert_to_markdown.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+"""Convert src/content/{posts,pages}.json → src/content/{posts,pages}/<slug>.md
+for Astro Content Collections."""
+import json
+import sys
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parent.parent
+CONTENT = ROOT / "src" / "content"
+POSTS_JSON = CONTENT / "posts.json"
+PAGES_JSON = CONTENT / "pages.json"
+POSTS_DIR = CONTENT / "posts"
+PAGES_DIR = CONTENT / "pages"
+
+POSTS_DIR.mkdir(parents=True, exist_ok=True)
+PAGES_DIR.mkdir(parents=True, exist_ok=True)
+
+# Ручные флаги по slug'у для главной ленты.
+# featured=True — пинится наверх как hero (отдельная карточка с рамкой).
+# hideFromList=True — не показывается в общей ленте (виден только в рубрике/архиве).
+# featuredImage — переопределяет первую <img> из тела для миниатюры/hero.
+SLUG_FLAGS: dict[str, dict] = {
+    'voronino': {'featured': True, 'featuredImage': '/uploads/IMG_2156.jpg'},
+    'staroe-staroe-selo': {'featured': True, 'featuredImage': '/uploads/IMG_2754.jpg'},
+    'vnimanie-texnicheskie-raboty': {'hideFromList': True},
+    'vnimanie-texnicheskie-raboty-2': {'hideFromList': True},
+    'c-nastupayushhim-novym-2014-godom': {'hideFromList': True},
+}
+
+def yaml_escape(s: str) -> str:
+    return s.replace('"', '\\"')
+
+def make_md(item: dict, kind: str) -> str:
+    fm = [
+        '---',
+        f'title: "{yaml_escape(item["title"])}"',
+        f'slug: {item["slug"]}',
+        f'legacyId: {item["id"]}',
+    ]
+    if item.get('date'):
+        date = item['date'].replace(' ', 'T') + '+03:00'
+        if kind == 'post':
+            fm.append(f'pubDate: {date}')
+        else:
+            fm.append(f'pubDate: {date}')
+    if item.get('excerpt'):
+        fm.append(f'description: "{yaml_escape(item["excerpt"])}"')
+    else:
+        fm.append('description: ""')
+    if kind == 'post':
+        if item.get('categories'):
+            fm.append('categories:')
+            for c in item['categories']:
+                fm.append(f'  - "{yaml_escape(c)}"')
+        else:
+            fm.append('categories: []')
+        if item.get('categorySlugs'):
+            fm.append('categorySlugs:')
+            for s in item['categorySlugs']:
+                fm.append(f'  - "{s}"')
+        else:
+            fm.append('categorySlugs: []')
+        fm.append('author: "История города Пушкино"')
+    if item.get('oldSlug') and item['oldSlug'] != item['slug']:
+        fm.append(f'oldSlug: "{item["oldSlug"]}"')
+    if kind == 'post':
+        flags = SLUG_FLAGS.get(item['slug'], {})
+        if flags.get('featured'):
+            fm.append('featured: true')
+        if flags.get('hideFromList'):
+            fm.append('hideFromList: true')
+        if flags.get('featuredImage'):
+            fm.append(f'featuredImage: "{flags["featuredImage"]}"')
+    fm.append('---')
+    fm.append('')
+    fm.append(item['html'])
+    return '\n'.join(fm) + '\n'
+
+def convert(json_path: Path, out_dir: Path, kind: str) -> int:
+    items = json.loads(json_path.read_text(encoding='utf-8'))
+    for item in items:
+        md = make_md(item, kind)
+        (out_dir / f'{item["slug"]}.md').write_text(md, encoding='utf-8')
+    return len(items)
+
+if __name__ == '__main__':
+    n_posts = convert(POSTS_JSON, POSTS_DIR, 'post')
+    n_pages = convert(PAGES_JSON, PAGES_DIR, 'page')
+    print(f'posts: {n_posts} → src/content/posts/')
+    print(f'pages: {n_pages} → src/content/pages/')
--- a/scripts/pull-external-rss.mjs
+++ b/scripts/pull-external-rss.mjs
@@ -0,0 +1,124 @@
+#!/usr/bin/env node
+/**
+ * Тянет внешние RSS-фиды из src/data/feeds.json и записывает агрегированный
+ * news.json в DATA_DIR (по умолчанию ./data). Запускается по cron на хосте.
+ *
+ * Использование:
+ *   node scripts/pull-external-rss.mjs           # пишет в ./data/news.json
+ *   DATA_DIR=/abs/path node scripts/pull-external-rss.mjs
+ */
+import fs from 'node:fs';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { XMLParser } from 'fast-xml-parser';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const ROOT = path.resolve(__dirname, '..');
+const FEEDS_FILE = path.join(ROOT, 'src', 'data', 'feeds.json');
+const DATA_DIR = process.env.DATA_DIR || path.join(ROOT, 'data');
+const OUT_FILE = path.join(DATA_DIR, 'news.json');
+const TIMEOUT_MS = 15000;
+const HARD_CAP = 200;
+
+const parser = new XMLParser({
+  ignoreAttributes: false,
+  attributeNamePrefix: '@_',
+  textNodeName: '#text',
+});
+
+async function fetchFeed(url, timeoutMs) {
+  const ctl = new AbortController();
+  const t = setTimeout(() => ctl.abort(), timeoutMs);
+  try {
+    const r = await fetch(url, {
+      signal: ctl.signal,
+      headers: { 'User-Agent': 'pushkinohistory-ru-v2 RSS aggregator' },
+    });
+    if (!r.ok) throw new Error(`HTTP ${r.status}`);
+    return await r.text();
+  } finally {
+    clearTimeout(t);
+  }
+}
+
+function stripHtml(s) {
+  if (!s) return '';
+  return String(s).replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim().slice(0, 400);
+}
+
+function extractItems(xml, feed) {
+  const parsed = parser.parse(xml);
+  const rssItems = parsed?.rss?.channel?.item;
+  if (rssItems) {
+    const arr = Array.isArray(rssItems) ? rssItems : [rssItems];
+    return arr.map((it) => ({
+      title: typeof it.title === 'string' ? it.title : it.title?.['#text'] || '',
+      link: typeof it.link === 'string' ? it.link : it.link?.['#text'] || '',
+      guid: typeof it.guid === 'string' ? it.guid : it.guid?.['#text'] || it.link || '',
+      pubDate: it.pubDate ? new Date(it.pubDate).toISOString() : null,
+      description: stripHtml(it.description || it['content:encoded'] || ''),
+      source: feed.name,
+    }));
+  }
+  const atomEntries = parsed?.feed?.entry;
+  if (atomEntries) {
+    const arr = Array.isArray(atomEntries) ? atomEntries : [atomEntries];
+    return arr.map((e) => {
+      const link = Array.isArray(e.link)
+        ? e.link[0]?.['@_href']
+        : e.link?.['@_href'] || e.link;
+      return {
+        title: typeof e.title === 'string' ? e.title : e.title?.['#text'] || '',
+        link: link || '',
+        guid: e.id || link || '',
+        pubDate: e.updated || e.published ? new Date(e.updated || e.published).toISOString() : null,
+        description: stripHtml(e.summary?.['#text'] || e.summary || e.content?.['#text'] || ''),
+        source: feed.name,
+      };
+    });
+  }
+  return [];
+}
+
+async function main() {
+  const feeds = JSON.parse(fs.readFileSync(FEEDS_FILE, 'utf8')).filter((f) => f.enabled);
+  if (feeds.length === 0) {
+    console.log('no enabled feeds — writing empty news.json');
+    fs.mkdirSync(DATA_DIR, { recursive: true });
+    fs.writeFileSync(OUT_FILE, JSON.stringify({ updatedAt: new Date().toISOString(), items: [] }, null, 2));
+    return;
+  }
+
+  const all = [];
+  for (const feed of feeds) {
+    try {
+      const xml = await fetchFeed(feed.url, TIMEOUT_MS);
+      const items = extractItems(xml, feed);
+      const max = feed.max || 20;
+      all.push(...items.slice(0, max));
+      console.log(`OK ${feed.name}: ${items.length} (kept ${Math.min(items.length, max)})`);
+    } catch (e) {
+      console.warn(`FAIL ${feed.name}: ${e.message}`);
+    }
+  }
+
+  const seen = new Set();
+  const deduped = [];
+  for (const it of all) {
+    const key = it.guid || it.link;
+    if (!key || seen.has(key)) continue;
+    seen.add(key);
+    deduped.push(it);
+  }
+  deduped.sort((a, b) => (b.pubDate || '').localeCompare(a.pubDate || ''));
+
+  const out = { updatedAt: new Date().toISOString(), items: deduped.slice(0, HARD_CAP) };
+  fs.mkdirSync(DATA_DIR, { recursive: true });
+  fs.writeFileSync(OUT_FILE, JSON.stringify(out, null, 2));
+  console.log(`-> ${OUT_FILE}: ${out.items.length} items`);
+}
+
+main().catch((e) => {
+  console.error(e);
+  process.exit(1);
+});