init: Vite+React+Tailwind v2 site with HTML content from WP, RSS feed, external feed aggregator, prerender
This commit is contained in:
73
scripts/build-rss.js
Normal file
73
scripts/build-rss.js
Normal file
@@ -0,0 +1,73 @@
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const ROOT = path.resolve(__dirname, '..');
|
||||
const DIST = path.join(ROOT, 'dist');
|
||||
|
||||
const SITE = 'https://pushkinohistory.ru';
|
||||
const TITLE = 'История города Пушкино';
|
||||
const DESC = 'Прошлое, настоящее, будущее города Пушкино.';
|
||||
|
||||
const posts = JSON.parse(fs.readFileSync(path.join(ROOT, 'src/content/posts.json'), 'utf8'));
|
||||
|
||||
const escapeXml = (s) =>
|
||||
String(s)
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, ''');
|
||||
|
||||
const cdata = (s) => `<![CDATA[${String(s).replace(/]]>/g, ']]]]><![CDATA[>')}]]>`;
|
||||
|
||||
const rfc2822 = (s) => {
|
||||
const d = new Date(s.replace(' ', 'T') + '+03:00');
|
||||
return d.toUTCString();
|
||||
};
|
||||
|
||||
const absoluteImages = (html) =>
|
||||
html.replace(/(src|href)="\/uploads\//g, `$1="${SITE}/uploads/`);
|
||||
|
||||
const items = posts.map((p) => {
|
||||
const html = absoluteImages(p.html);
|
||||
const description = p.excerpt
|
||||
? p.excerpt
|
||||
: html.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim().slice(0, 500);
|
||||
const url = `${SITE}/${p.slug}/`;
|
||||
return ` <item>
|
||||
<title>${escapeXml(p.title)}</title>
|
||||
<link>${url}</link>
|
||||
<guid isPermaLink="true">${url}</guid>
|
||||
<pubDate>${rfc2822(p.date)}</pubDate>
|
||||
<dc:creator>${cdata('История города Пушкино')}</dc:creator>
|
||||
${(p.categories || []).map((c) => `<category>${escapeXml(c)}</category>`).join('\n ')}
|
||||
<description>${cdata(description)}</description>
|
||||
<content:encoded>${cdata(html)}</content:encoded>
|
||||
</item>`;
|
||||
}).join('\n');
|
||||
|
||||
const lastBuild = new Date().toUTCString();
|
||||
|
||||
const rss = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss version="2.0"
|
||||
xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:atom="http://www.w3.org/2005/Atom">
|
||||
<channel>
|
||||
<title>${escapeXml(TITLE)}</title>
|
||||
<link>${SITE}/</link>
|
||||
<atom:link href="${SITE}/feed/" rel="self" type="application/rss+xml" />
|
||||
<description>${escapeXml(DESC)}</description>
|
||||
<language>ru-RU</language>
|
||||
<lastBuildDate>${lastBuild}</lastBuildDate>
|
||||
<ttl>60</ttl>
|
||||
${items}
|
||||
</channel>
|
||||
</rss>
|
||||
`;
|
||||
|
||||
fs.mkdirSync(DIST, { recursive: true });
|
||||
fs.writeFileSync(path.join(DIST, 'feed.xml'), rss);
|
||||
console.log(`rss: ${posts.length} items → dist/feed.xml`);
|
||||
58
scripts/build-sitemap.js
Normal file
58
scripts/build-sitemap.js
Normal file
@@ -0,0 +1,58 @@
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const ROOT = path.resolve(__dirname, '..');
|
||||
const DIST = path.join(ROOT, 'dist');
|
||||
|
||||
const SITE = 'https://pushkinohistory.ru';
|
||||
|
||||
const posts = JSON.parse(fs.readFileSync(path.join(ROOT, 'src/content/posts.json'), 'utf8'));
|
||||
const pages = JSON.parse(fs.readFileSync(path.join(ROOT, 'src/content/pages.json'), 'utf8'));
|
||||
|
||||
const today = new Date().toISOString().slice(0, 10);
|
||||
|
||||
const urls = [
|
||||
{ loc: `${SITE}/`, lastmod: today, priority: '1.0', changefreq: 'weekly' },
|
||||
{ loc: `${SITE}/news/`, lastmod: today, priority: '0.8', changefreq: 'hourly' },
|
||||
];
|
||||
for (const p of pages) {
|
||||
urls.push({
|
||||
loc: `${SITE}/${p.slug}/`,
|
||||
lastmod: p.date.slice(0, 10),
|
||||
priority: '0.7',
|
||||
changefreq: 'yearly',
|
||||
});
|
||||
}
|
||||
for (const p of posts) {
|
||||
urls.push({
|
||||
loc: `${SITE}/${p.slug}/`,
|
||||
lastmod: p.date.slice(0, 10),
|
||||
priority: '0.6',
|
||||
changefreq: 'monthly',
|
||||
});
|
||||
}
|
||||
|
||||
const xml = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
${urls.map((u) => ` <url>
|
||||
<loc>${u.loc}</loc>
|
||||
<lastmod>${u.lastmod}</lastmod>
|
||||
<changefreq>${u.changefreq}</changefreq>
|
||||
<priority>${u.priority}</priority>
|
||||
</url>`).join('\n')}
|
||||
</urlset>
|
||||
`;
|
||||
|
||||
fs.mkdirSync(DIST, { recursive: true });
|
||||
fs.writeFileSync(path.join(DIST, 'sitemap.xml'), xml);
|
||||
|
||||
const robots = `User-agent: *
|
||||
Allow: /
|
||||
|
||||
Sitemap: ${SITE}/sitemap.xml
|
||||
`;
|
||||
fs.writeFileSync(path.join(DIST, 'robots.txt'), robots);
|
||||
|
||||
console.log(`sitemap: ${urls.length} URLs → dist/sitemap.xml`);
|
||||
22
scripts/build-slugs.js
Normal file
22
scripts/build-slugs.js
Normal file
@@ -0,0 +1,22 @@
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const ROOT = path.resolve(__dirname, '..');
|
||||
const DIST = path.join(ROOT, 'dist');
|
||||
|
||||
const posts = JSON.parse(fs.readFileSync(path.join(ROOT, 'src/content/posts.json'), 'utf8'));
|
||||
const pages = JSON.parse(fs.readFileSync(path.join(ROOT, 'src/content/pages.json'), 'utf8'));
|
||||
|
||||
const cats = new Set();
|
||||
for (const p of posts) (p.categorySlugs || []).forEach((s) => cats.add(s));
|
||||
|
||||
const routes = ['/', '/news/'];
|
||||
for (const p of posts) routes.push(`/${p.slug}/`);
|
||||
for (const p of pages) routes.push(`/${p.slug}/`);
|
||||
for (const c of cats) routes.push(`/cat/${c}/`);
|
||||
|
||||
fs.mkdirSync(DIST, { recursive: true });
|
||||
fs.writeFileSync(path.join(DIST, 'routes.json'), JSON.stringify(routes, null, 2));
|
||||
console.log(`routes: ${routes.length} → dist/routes.json`);
|
||||
97
scripts/convert_posts.py
Normal file
97
scripts/convert_posts.py
Normal file
@@ -0,0 +1,97 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Convert WP posts-raw.jsonl → src/content/{posts,pages}.json with image URL rewrite + translit slugs."""
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from urllib.parse import unquote
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
RAW = ROOT / "scripts" / "posts-raw.jsonl"
|
||||
OUT_DIR = ROOT / "src" / "content"
|
||||
OUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
TRANSLIT = {
|
||||
'а': 'a', 'б': 'b', 'в': 'v', 'г': 'g', 'д': 'd', 'е': 'e', 'ё': 'yo',
|
||||
'ж': 'zh', 'з': 'z', 'и': 'i', 'й': 'y', 'к': 'k', 'л': 'l', 'м': 'm',
|
||||
'н': 'n', 'о': 'o', 'п': 'p', 'р': 'r', 'с': 's', 'т': 't', 'у': 'u',
|
||||
'ф': 'f', 'х': 'h', 'ц': 'ts', 'ч': 'ch', 'ш': 'sh', 'щ': 'sch',
|
||||
'ъ': '', 'ы': 'y', 'ь': '', 'э': 'e', 'ю': 'yu', 'я': 'ya',
|
||||
}
|
||||
|
||||
def slugify_ru(s: str) -> str:
|
||||
s = unquote(s).lower()
|
||||
out = []
|
||||
for ch in s:
|
||||
if ch in TRANSLIT:
|
||||
out.append(TRANSLIT[ch])
|
||||
elif ch.isalnum() or ch in '-_':
|
||||
out.append(ch)
|
||||
elif ch in ' \t':
|
||||
out.append('-')
|
||||
res = ''.join(out)
|
||||
res = re.sub(r'-+', '-', res).strip('-')
|
||||
return res or 'untitled'
|
||||
|
||||
UPLOAD_RE = re.compile(r'https?://(?:www\.)?pushkinohistory\.ru/wp-content/uploads/[^/]+/[^/]+/([^"\'\s)]+)')
|
||||
|
||||
def rewrite_uploads(html: str) -> str:
|
||||
return UPLOAD_RE.sub(r'/uploads/\1', html)
|
||||
|
||||
CATEGORIES = {
|
||||
20: [], 23: [], 73: [], 94: [], # pages — no category
|
||||
137: ['main'], 139: ['main'],
|
||||
142: ['main'], 145: ['main', 'today', 'tech'],
|
||||
158: ['main', 'tech'], 226: ['main'], 235: ['main'],
|
||||
}
|
||||
|
||||
CATEGORY_NAMES = {
|
||||
'main': 'Главная',
|
||||
'today': 'Настоящее',
|
||||
'tech': 'Техническое',
|
||||
}
|
||||
|
||||
def main() -> None:
|
||||
posts: list[dict] = []
|
||||
pages: list[dict] = []
|
||||
with RAW.open(encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
row = json.loads(line)
|
||||
old_slug = row['name']
|
||||
new_slug = slugify_ru(old_slug if '%' in old_slug else old_slug)
|
||||
html = rewrite_uploads(row['content'])
|
||||
item = {
|
||||
'id': row['id'],
|
||||
'slug': new_slug,
|
||||
'oldSlug': old_slug,
|
||||
'title': row['title'],
|
||||
'date': row['date'],
|
||||
'excerpt': row.get('excerpt') or '',
|
||||
'html': html,
|
||||
'categories': [CATEGORY_NAMES[c] for c in CATEGORIES.get(row['id'], [])],
|
||||
'categorySlugs': CATEGORIES.get(row['id'], []),
|
||||
}
|
||||
if row['type'] == 'post':
|
||||
posts.append(item)
|
||||
else:
|
||||
pages.append(item)
|
||||
posts.sort(key=lambda p: p['date'], reverse=True)
|
||||
pages.sort(key=lambda p: p['id'])
|
||||
(OUT_DIR / 'posts.json').write_text(
|
||||
json.dumps(posts, ensure_ascii=False, indent=2), encoding='utf-8'
|
||||
)
|
||||
(OUT_DIR / 'pages.json').write_text(
|
||||
json.dumps(pages, ensure_ascii=False, indent=2), encoding='utf-8'
|
||||
)
|
||||
print(f'posts: {len(posts)} → src/content/posts.json')
|
||||
print(f'pages: {len(pages)} → src/content/pages.json')
|
||||
for p in posts:
|
||||
print(f" post: /{p['slug']}/ (was: {p['oldSlug'][:40]}{'...' if len(p['oldSlug']) > 40 else ''}) — {p['title']}")
|
||||
for p in pages:
|
||||
print(f" page: /{p['slug']}/ — {p['title']}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
53
scripts/prerender.js
Normal file
53
scripts/prerender.js
Normal file
@@ -0,0 +1,53 @@
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import http from 'node:http';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import express from 'express';
|
||||
import puppeteer from 'puppeteer';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const ROOT = path.resolve(__dirname, '..');
|
||||
const DIST = path.join(ROOT, 'dist');
|
||||
|
||||
const routes = JSON.parse(fs.readFileSync(path.join(DIST, 'routes.json'), 'utf8'));
|
||||
|
||||
const app = express();
|
||||
app.use(express.static(DIST));
|
||||
app.get('*', (_req, res) => res.sendFile(path.join(DIST, 'index.html')));
|
||||
const server = http.createServer(app);
|
||||
|
||||
async function start() {
|
||||
await new Promise((resolve) => server.listen(0, resolve));
|
||||
const port = server.address().port;
|
||||
const baseUrl = `http://127.0.0.1:${port}`;
|
||||
|
||||
const launchOpts = { headless: 'new', args: ['--no-sandbox', '--disable-setuid-sandbox'] };
|
||||
if (process.env.PUPPETEER_EXECUTABLE_PATH) {
|
||||
launchOpts.executablePath = process.env.PUPPETEER_EXECUTABLE_PATH;
|
||||
}
|
||||
const browser = await puppeteer.launch(launchOpts);
|
||||
|
||||
for (const route of routes) {
|
||||
const page = await browser.newPage();
|
||||
const url = `${baseUrl}${route}`;
|
||||
try {
|
||||
await page.goto(url, { waitUntil: 'networkidle0', timeout: 15000 });
|
||||
} catch (e) {
|
||||
console.warn(`prerender warn ${route}: ${e.message}`);
|
||||
}
|
||||
const html = await page.content();
|
||||
const outDir = path.join(DIST, route);
|
||||
fs.mkdirSync(outDir, { recursive: true });
|
||||
fs.writeFileSync(path.join(outDir, 'index.html'), html);
|
||||
console.log(`prerender: ${route}`);
|
||||
await page.close();
|
||||
}
|
||||
|
||||
await browser.close();
|
||||
server.close();
|
||||
}
|
||||
|
||||
start().catch((e) => {
|
||||
console.error(e);
|
||||
process.exit(1);
|
||||
});
|
||||
125
scripts/pull-external-rss.js
Normal file
125
scripts/pull-external-rss.js
Normal file
@@ -0,0 +1,125 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Тянет внешние RSS-фиды из src/content/feeds.json, дедуплицирует по guid/link,
|
||||
* пишет агрегированный news.json в DATA_DIR (default: ./data).
|
||||
* Запускается по cron на хосте.
|
||||
*/
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { XMLParser } from 'fast-xml-parser';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const ROOT = path.resolve(__dirname, '..');
|
||||
const FEEDS_FILE = path.join(ROOT, 'src/content/feeds.json');
|
||||
const DATA_DIR = process.env.DATA_DIR || path.join(ROOT, 'data');
|
||||
const OUT_FILE = path.join(DATA_DIR, 'news.json');
|
||||
const TIMEOUT_MS = 15000;
|
||||
const HARD_CAP = 200;
|
||||
|
||||
const parser = new XMLParser({
|
||||
ignoreAttributes: false,
|
||||
attributeNamePrefix: '@_',
|
||||
textNodeName: '#text',
|
||||
});
|
||||
|
||||
async function fetchFeed(url, timeoutMs) {
|
||||
const ctl = new AbortController();
|
||||
const t = setTimeout(() => ctl.abort(), timeoutMs);
|
||||
try {
|
||||
const r = await fetch(url, {
|
||||
signal: ctl.signal,
|
||||
headers: { 'User-Agent': 'pushkinohistory-ru-v2 RSS aggregator' },
|
||||
});
|
||||
if (!r.ok) throw new Error(`HTTP ${r.status}`);
|
||||
return await r.text();
|
||||
} finally {
|
||||
clearTimeout(t);
|
||||
}
|
||||
}
|
||||
|
||||
function extractItems(xml, feed) {
|
||||
const parsed = parser.parse(xml);
|
||||
// RSS 2.0
|
||||
const rssItems = parsed?.rss?.channel?.item;
|
||||
if (rssItems) {
|
||||
const arr = Array.isArray(rssItems) ? rssItems : [rssItems];
|
||||
return arr.map((it) => ({
|
||||
title: typeof it.title === 'string' ? it.title : it.title?.['#text'] || '',
|
||||
link: typeof it.link === 'string' ? it.link : it.link?.['#text'] || '',
|
||||
guid: typeof it.guid === 'string' ? it.guid : it.guid?.['#text'] || it.link || '',
|
||||
pubDate: it.pubDate ? new Date(it.pubDate).toISOString() : null,
|
||||
description: stripHtml(it.description || it['content:encoded'] || ''),
|
||||
source: feed.name,
|
||||
}));
|
||||
}
|
||||
// Atom
|
||||
const atomEntries = parsed?.feed?.entry;
|
||||
if (atomEntries) {
|
||||
const arr = Array.isArray(atomEntries) ? atomEntries : [atomEntries];
|
||||
return arr.map((e) => {
|
||||
const link = Array.isArray(e.link) ? e.link[0]?.['@_href'] : e.link?.['@_href'] || e.link;
|
||||
return {
|
||||
title: typeof e.title === 'string' ? e.title : e.title?.['#text'] || '',
|
||||
link: link || '',
|
||||
guid: e.id || link || '',
|
||||
pubDate: e.updated || e.published ? new Date(e.updated || e.published).toISOString() : null,
|
||||
description: stripHtml(e.summary?.['#text'] || e.summary || e.content?.['#text'] || ''),
|
||||
source: feed.name,
|
||||
};
|
||||
});
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
function stripHtml(s) {
|
||||
if (!s) return '';
|
||||
return String(s).replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim().slice(0, 400);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const feeds = JSON.parse(fs.readFileSync(FEEDS_FILE, 'utf8')).filter((f) => f.enabled);
|
||||
if (feeds.length === 0) {
|
||||
console.log('no enabled feeds — writing empty news.json');
|
||||
fs.mkdirSync(DATA_DIR, { recursive: true });
|
||||
fs.writeFileSync(OUT_FILE, JSON.stringify({ updatedAt: new Date().toISOString(), items: [] }, null, 2));
|
||||
return;
|
||||
}
|
||||
|
||||
const all = [];
|
||||
for (const feed of feeds) {
|
||||
try {
|
||||
const xml = await fetchFeed(feed.url, TIMEOUT_MS);
|
||||
const items = extractItems(xml, feed);
|
||||
const max = feed.max || 20;
|
||||
all.push(...items.slice(0, max));
|
||||
console.log(`✓ ${feed.name}: ${items.length} items (kept ${Math.min(items.length, max)})`);
|
||||
} catch (e) {
|
||||
console.warn(`✗ ${feed.name}: ${e.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
const seen = new Set();
|
||||
const deduped = [];
|
||||
for (const it of all) {
|
||||
const key = it.guid || it.link;
|
||||
if (!key || seen.has(key)) continue;
|
||||
seen.add(key);
|
||||
deduped.push(it);
|
||||
}
|
||||
deduped.sort((a, b) => (b.pubDate || '').localeCompare(a.pubDate || ''));
|
||||
|
||||
const out = {
|
||||
updatedAt: new Date().toISOString(),
|
||||
items: deduped.slice(0, HARD_CAP),
|
||||
};
|
||||
|
||||
fs.mkdirSync(DATA_DIR, { recursive: true });
|
||||
fs.writeFileSync(OUT_FILE, JSON.stringify(out, null, 2));
|
||||
console.log(`→ ${OUT_FILE}: ${out.items.length} items`);
|
||||
}
|
||||
|
||||
main().catch((e) => {
|
||||
console.error(e);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user