init: Vite+React+Tailwind v2 site with HTML content from WP, RSS feed, external feed aggregator, prerender

This commit is contained in:
striker
2026-05-21 01:11:26 +03:00
commit 76cdeb8b48
42 changed files with 6317 additions and 0 deletions

73
scripts/build-rss.js Normal file
View File

@@ -0,0 +1,73 @@
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const ROOT = path.resolve(__dirname, '..');
const DIST = path.join(ROOT, 'dist');
const SITE = 'https://pushkinohistory.ru';
const TITLE = 'История города Пушкино';
const DESC = 'Прошлое, настоящее, будущее города Пушкино.';
const posts = JSON.parse(fs.readFileSync(path.join(ROOT, 'src/content/posts.json'), 'utf8'));
const escapeXml = (s) =>
String(s)
.replace(/&/g, '&')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&apos;');
const cdata = (s) => `<![CDATA[${String(s).replace(/]]>/g, ']]]]><![CDATA[>')}]]>`;
const rfc2822 = (s) => {
const d = new Date(s.replace(' ', 'T') + '+03:00');
return d.toUTCString();
};
const absoluteImages = (html) =>
html.replace(/(src|href)="\/uploads\//g, `$1="${SITE}/uploads/`);
const items = posts.map((p) => {
const html = absoluteImages(p.html);
const description = p.excerpt
? p.excerpt
: html.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim().slice(0, 500);
const url = `${SITE}/${p.slug}/`;
return ` <item>
<title>${escapeXml(p.title)}</title>
<link>${url}</link>
<guid isPermaLink="true">${url}</guid>
<pubDate>${rfc2822(p.date)}</pubDate>
<dc:creator>${cdata('История города Пушкино')}</dc:creator>
${(p.categories || []).map((c) => `<category>${escapeXml(c)}</category>`).join('\n ')}
<description>${cdata(description)}</description>
<content:encoded>${cdata(html)}</content:encoded>
</item>`;
}).join('\n');
const lastBuild = new Date().toUTCString();
const rss = `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>${escapeXml(TITLE)}</title>
<link>${SITE}/</link>
<atom:link href="${SITE}/feed/" rel="self" type="application/rss+xml" />
<description>${escapeXml(DESC)}</description>
<language>ru-RU</language>
<lastBuildDate>${lastBuild}</lastBuildDate>
<ttl>60</ttl>
${items}
</channel>
</rss>
`;
fs.mkdirSync(DIST, { recursive: true });
fs.writeFileSync(path.join(DIST, 'feed.xml'), rss);
console.log(`rss: ${posts.length} items → dist/feed.xml`);

58
scripts/build-sitemap.js Normal file
View File

@@ -0,0 +1,58 @@
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const ROOT = path.resolve(__dirname, '..');
const DIST = path.join(ROOT, 'dist');
const SITE = 'https://pushkinohistory.ru';
const posts = JSON.parse(fs.readFileSync(path.join(ROOT, 'src/content/posts.json'), 'utf8'));
const pages = JSON.parse(fs.readFileSync(path.join(ROOT, 'src/content/pages.json'), 'utf8'));
const today = new Date().toISOString().slice(0, 10);
const urls = [
{ loc: `${SITE}/`, lastmod: today, priority: '1.0', changefreq: 'weekly' },
{ loc: `${SITE}/news/`, lastmod: today, priority: '0.8', changefreq: 'hourly' },
];
for (const p of pages) {
urls.push({
loc: `${SITE}/${p.slug}/`,
lastmod: p.date.slice(0, 10),
priority: '0.7',
changefreq: 'yearly',
});
}
for (const p of posts) {
urls.push({
loc: `${SITE}/${p.slug}/`,
lastmod: p.date.slice(0, 10),
priority: '0.6',
changefreq: 'monthly',
});
}
const xml = `<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
${urls.map((u) => ` <url>
<loc>${u.loc}</loc>
<lastmod>${u.lastmod}</lastmod>
<changefreq>${u.changefreq}</changefreq>
<priority>${u.priority}</priority>
</url>`).join('\n')}
</urlset>
`;
fs.mkdirSync(DIST, { recursive: true });
fs.writeFileSync(path.join(DIST, 'sitemap.xml'), xml);
const robots = `User-agent: *
Allow: /
Sitemap: ${SITE}/sitemap.xml
`;
fs.writeFileSync(path.join(DIST, 'robots.txt'), robots);
console.log(`sitemap: ${urls.length} URLs → dist/sitemap.xml`);

22
scripts/build-slugs.js Normal file
View File

@@ -0,0 +1,22 @@
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const ROOT = path.resolve(__dirname, '..');
const DIST = path.join(ROOT, 'dist');
const posts = JSON.parse(fs.readFileSync(path.join(ROOT, 'src/content/posts.json'), 'utf8'));
const pages = JSON.parse(fs.readFileSync(path.join(ROOT, 'src/content/pages.json'), 'utf8'));
const cats = new Set();
for (const p of posts) (p.categorySlugs || []).forEach((s) => cats.add(s));
const routes = ['/', '/news/'];
for (const p of posts) routes.push(`/${p.slug}/`);
for (const p of pages) routes.push(`/${p.slug}/`);
for (const c of cats) routes.push(`/cat/${c}/`);
fs.mkdirSync(DIST, { recursive: true });
fs.writeFileSync(path.join(DIST, 'routes.json'), JSON.stringify(routes, null, 2));
console.log(`routes: ${routes.length} → dist/routes.json`);

97
scripts/convert_posts.py Normal file
View File

@@ -0,0 +1,97 @@
#!/usr/bin/env python3
"""Convert WP posts-raw.jsonl → src/content/{posts,pages}.json with image URL rewrite + translit slugs."""
import json
import re
import sys
from pathlib import Path
from urllib.parse import unquote
ROOT = Path(__file__).resolve().parent.parent
RAW = ROOT / "scripts" / "posts-raw.jsonl"
OUT_DIR = ROOT / "src" / "content"
OUT_DIR.mkdir(parents=True, exist_ok=True)
TRANSLIT = {
'а': 'a', 'б': 'b', 'в': 'v', 'г': 'g', 'д': 'd', 'е': 'e', 'ё': 'yo',
'ж': 'zh', 'з': 'z', 'и': 'i', 'й': 'y', 'к': 'k', 'л': 'l', 'м': 'm',
'н': 'n', 'о': 'o', 'п': 'p', 'р': 'r', 'с': 's', 'т': 't', 'у': 'u',
'ф': 'f', 'х': 'h', 'ц': 'ts', 'ч': 'ch', 'ш': 'sh', 'щ': 'sch',
'ъ': '', 'ы': 'y', 'ь': '', 'э': 'e', 'ю': 'yu', 'я': 'ya',
}
def slugify_ru(s: str) -> str:
s = unquote(s).lower()
out = []
for ch in s:
if ch in TRANSLIT:
out.append(TRANSLIT[ch])
elif ch.isalnum() or ch in '-_':
out.append(ch)
elif ch in ' \t':
out.append('-')
res = ''.join(out)
res = re.sub(r'-+', '-', res).strip('-')
return res or 'untitled'
UPLOAD_RE = re.compile(r'https?://(?:www\.)?pushkinohistory\.ru/wp-content/uploads/[^/]+/[^/]+/([^"\'\s)]+)')
def rewrite_uploads(html: str) -> str:
return UPLOAD_RE.sub(r'/uploads/\1', html)
CATEGORIES = {
20: [], 23: [], 73: [], 94: [], # pages — no category
137: ['main'], 139: ['main'],
142: ['main'], 145: ['main', 'today', 'tech'],
158: ['main', 'tech'], 226: ['main'], 235: ['main'],
}
CATEGORY_NAMES = {
'main': 'Главная',
'today': 'Настоящее',
'tech': 'Техническое',
}
def main() -> None:
posts: list[dict] = []
pages: list[dict] = []
with RAW.open(encoding='utf-8') as f:
for line in f:
line = line.strip()
if not line:
continue
row = json.loads(line)
old_slug = row['name']
new_slug = slugify_ru(old_slug if '%' in old_slug else old_slug)
html = rewrite_uploads(row['content'])
item = {
'id': row['id'],
'slug': new_slug,
'oldSlug': old_slug,
'title': row['title'],
'date': row['date'],
'excerpt': row.get('excerpt') or '',
'html': html,
'categories': [CATEGORY_NAMES[c] for c in CATEGORIES.get(row['id'], [])],
'categorySlugs': CATEGORIES.get(row['id'], []),
}
if row['type'] == 'post':
posts.append(item)
else:
pages.append(item)
posts.sort(key=lambda p: p['date'], reverse=True)
pages.sort(key=lambda p: p['id'])
(OUT_DIR / 'posts.json').write_text(
json.dumps(posts, ensure_ascii=False, indent=2), encoding='utf-8'
)
(OUT_DIR / 'pages.json').write_text(
json.dumps(pages, ensure_ascii=False, indent=2), encoding='utf-8'
)
print(f'posts: {len(posts)} → src/content/posts.json')
print(f'pages: {len(pages)} → src/content/pages.json')
for p in posts:
print(f" post: /{p['slug']}/ (was: {p['oldSlug'][:40]}{'...' if len(p['oldSlug']) > 40 else ''}) — {p['title']}")
for p in pages:
print(f" page: /{p['slug']}/ — {p['title']}")
if __name__ == '__main__':
main()

53
scripts/prerender.js Normal file
View File

@@ -0,0 +1,53 @@
import fs from 'node:fs';
import path from 'node:path';
import http from 'node:http';
import { fileURLToPath } from 'node:url';
import express from 'express';
import puppeteer from 'puppeteer';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const ROOT = path.resolve(__dirname, '..');
const DIST = path.join(ROOT, 'dist');
const routes = JSON.parse(fs.readFileSync(path.join(DIST, 'routes.json'), 'utf8'));
const app = express();
app.use(express.static(DIST));
app.get('*', (_req, res) => res.sendFile(path.join(DIST, 'index.html')));
const server = http.createServer(app);
async function start() {
await new Promise((resolve) => server.listen(0, resolve));
const port = server.address().port;
const baseUrl = `http://127.0.0.1:${port}`;
const launchOpts = { headless: 'new', args: ['--no-sandbox', '--disable-setuid-sandbox'] };
if (process.env.PUPPETEER_EXECUTABLE_PATH) {
launchOpts.executablePath = process.env.PUPPETEER_EXECUTABLE_PATH;
}
const browser = await puppeteer.launch(launchOpts);
for (const route of routes) {
const page = await browser.newPage();
const url = `${baseUrl}${route}`;
try {
await page.goto(url, { waitUntil: 'networkidle0', timeout: 15000 });
} catch (e) {
console.warn(`prerender warn ${route}: ${e.message}`);
}
const html = await page.content();
const outDir = path.join(DIST, route);
fs.mkdirSync(outDir, { recursive: true });
fs.writeFileSync(path.join(outDir, 'index.html'), html);
console.log(`prerender: ${route}`);
await page.close();
}
await browser.close();
server.close();
}
start().catch((e) => {
console.error(e);
process.exit(1);
});

View File

@@ -0,0 +1,125 @@
#!/usr/bin/env node
/**
* Тянет внешние RSS-фиды из src/content/feeds.json, дедуплицирует по guid/link,
* пишет агрегированный news.json в DATA_DIR (default: ./data).
* Запускается по cron на хосте.
*/
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { XMLParser } from 'fast-xml-parser';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const ROOT = path.resolve(__dirname, '..');
const FEEDS_FILE = path.join(ROOT, 'src/content/feeds.json');
const DATA_DIR = process.env.DATA_DIR || path.join(ROOT, 'data');
const OUT_FILE = path.join(DATA_DIR, 'news.json');
const TIMEOUT_MS = 15000;
const HARD_CAP = 200;
const parser = new XMLParser({
ignoreAttributes: false,
attributeNamePrefix: '@_',
textNodeName: '#text',
});
async function fetchFeed(url, timeoutMs) {
const ctl = new AbortController();
const t = setTimeout(() => ctl.abort(), timeoutMs);
try {
const r = await fetch(url, {
signal: ctl.signal,
headers: { 'User-Agent': 'pushkinohistory-ru-v2 RSS aggregator' },
});
if (!r.ok) throw new Error(`HTTP ${r.status}`);
return await r.text();
} finally {
clearTimeout(t);
}
}
function extractItems(xml, feed) {
const parsed = parser.parse(xml);
// RSS 2.0
const rssItems = parsed?.rss?.channel?.item;
if (rssItems) {
const arr = Array.isArray(rssItems) ? rssItems : [rssItems];
return arr.map((it) => ({
title: typeof it.title === 'string' ? it.title : it.title?.['#text'] || '',
link: typeof it.link === 'string' ? it.link : it.link?.['#text'] || '',
guid: typeof it.guid === 'string' ? it.guid : it.guid?.['#text'] || it.link || '',
pubDate: it.pubDate ? new Date(it.pubDate).toISOString() : null,
description: stripHtml(it.description || it['content:encoded'] || ''),
source: feed.name,
}));
}
// Atom
const atomEntries = parsed?.feed?.entry;
if (atomEntries) {
const arr = Array.isArray(atomEntries) ? atomEntries : [atomEntries];
return arr.map((e) => {
const link = Array.isArray(e.link) ? e.link[0]?.['@_href'] : e.link?.['@_href'] || e.link;
return {
title: typeof e.title === 'string' ? e.title : e.title?.['#text'] || '',
link: link || '',
guid: e.id || link || '',
pubDate: e.updated || e.published ? new Date(e.updated || e.published).toISOString() : null,
description: stripHtml(e.summary?.['#text'] || e.summary || e.content?.['#text'] || ''),
source: feed.name,
};
});
}
return [];
}
function stripHtml(s) {
if (!s) return '';
return String(s).replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim().slice(0, 400);
}
async function main() {
const feeds = JSON.parse(fs.readFileSync(FEEDS_FILE, 'utf8')).filter((f) => f.enabled);
if (feeds.length === 0) {
console.log('no enabled feeds — writing empty news.json');
fs.mkdirSync(DATA_DIR, { recursive: true });
fs.writeFileSync(OUT_FILE, JSON.stringify({ updatedAt: new Date().toISOString(), items: [] }, null, 2));
return;
}
const all = [];
for (const feed of feeds) {
try {
const xml = await fetchFeed(feed.url, TIMEOUT_MS);
const items = extractItems(xml, feed);
const max = feed.max || 20;
all.push(...items.slice(0, max));
console.log(`${feed.name}: ${items.length} items (kept ${Math.min(items.length, max)})`);
} catch (e) {
console.warn(`${feed.name}: ${e.message}`);
}
}
const seen = new Set();
const deduped = [];
for (const it of all) {
const key = it.guid || it.link;
if (!key || seen.has(key)) continue;
seen.add(key);
deduped.push(it);
}
deduped.sort((a, b) => (b.pubDate || '').localeCompare(a.pubDate || ''));
const out = {
updatedAt: new Date().toISOString(),
items: deduped.slice(0, HARD_CAP),
};
fs.mkdirSync(DATA_DIR, { recursive: true });
fs.writeFileSync(OUT_FILE, JSON.stringify(out, null, 2));
console.log(`${OUT_FILE}: ${out.items.length} items`);
}
main().catch((e) => {
console.error(e);
process.exit(1);
});