Some checks failed
deploy / deploy (push) Failing after 12s
- Бэкап старой версии на ветке vite-react-backup - Stack: Astro 5 + nginx:alpine runtime, образ ~50 МБ (был ~600 МБ) - @astrojs/rss заменён ручным buildRss() — гарантия CDATA в content:encoded для IPB Importer - @astrojs/sitemap → sitemap-index.xml + sitemap.txt - 152-ФЗ cookie consent + privacy.astro + Analytics с gating - AI-файлы: robots.txt с явным allow для AI-краулеров, ai.txt, llms.txt - Гибридный визуал: фото-фон шапки (аэрофото Пушкино) + PT Serif + IBM Plex Sans - Иерархия: hero "Главная история" с рамкой + "Ещё из истории" + "Хроника" - Категория "main" (псевдо) скрыта из плашек и из Рубрик в сайдбаре - hideFromList: true для технических постов - featuredImage в frontmatter для постов без хорошей первой <img> - WP resized-URL (-WxH.ext) автоматически → оригинал - CI/CD: .gitea/workflows/deploy.yml (push → SSH-build) - Внешние RSS: scripts/pull-external-rss.mjs пишет news.json в bind-mount, фронт фетчит client-side
125 lines
4.2 KiB
JavaScript
125 lines
4.2 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* Тянет внешние RSS-фиды из src/data/feeds.json и записывает агрегированный
|
|
* news.json в DATA_DIR (по умолчанию ./data). Запускается по cron на хосте.
|
|
*
|
|
* Использование:
|
|
* node scripts/pull-external-rss.mjs # пишет в ./data/news.json
|
|
* DATA_DIR=/abs/path node scripts/pull-external-rss.mjs
|
|
*/
|
|
import fs from 'node:fs';
|
|
import path from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
import { XMLParser } from 'fast-xml-parser';
|
|
|
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
const ROOT = path.resolve(__dirname, '..');
|
|
const FEEDS_FILE = path.join(ROOT, 'src', 'data', 'feeds.json');
|
|
const DATA_DIR = process.env.DATA_DIR || path.join(ROOT, 'data');
|
|
const OUT_FILE = path.join(DATA_DIR, 'news.json');
|
|
const TIMEOUT_MS = 15000;
|
|
const HARD_CAP = 200;
|
|
|
|
const parser = new XMLParser({
|
|
ignoreAttributes: false,
|
|
attributeNamePrefix: '@_',
|
|
textNodeName: '#text',
|
|
});
|
|
|
|
async function fetchFeed(url, timeoutMs) {
|
|
const ctl = new AbortController();
|
|
const t = setTimeout(() => ctl.abort(), timeoutMs);
|
|
try {
|
|
const r = await fetch(url, {
|
|
signal: ctl.signal,
|
|
headers: { 'User-Agent': 'pushkinohistory-ru-v2 RSS aggregator' },
|
|
});
|
|
if (!r.ok) throw new Error(`HTTP ${r.status}`);
|
|
return await r.text();
|
|
} finally {
|
|
clearTimeout(t);
|
|
}
|
|
}
|
|
|
|
function stripHtml(s) {
|
|
if (!s) return '';
|
|
return String(s).replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim().slice(0, 400);
|
|
}
|
|
|
|
function extractItems(xml, feed) {
|
|
const parsed = parser.parse(xml);
|
|
const rssItems = parsed?.rss?.channel?.item;
|
|
if (rssItems) {
|
|
const arr = Array.isArray(rssItems) ? rssItems : [rssItems];
|
|
return arr.map((it) => ({
|
|
title: typeof it.title === 'string' ? it.title : it.title?.['#text'] || '',
|
|
link: typeof it.link === 'string' ? it.link : it.link?.['#text'] || '',
|
|
guid: typeof it.guid === 'string' ? it.guid : it.guid?.['#text'] || it.link || '',
|
|
pubDate: it.pubDate ? new Date(it.pubDate).toISOString() : null,
|
|
description: stripHtml(it.description || it['content:encoded'] || ''),
|
|
source: feed.name,
|
|
}));
|
|
}
|
|
const atomEntries = parsed?.feed?.entry;
|
|
if (atomEntries) {
|
|
const arr = Array.isArray(atomEntries) ? atomEntries : [atomEntries];
|
|
return arr.map((e) => {
|
|
const link = Array.isArray(e.link)
|
|
? e.link[0]?.['@_href']
|
|
: e.link?.['@_href'] || e.link;
|
|
return {
|
|
title: typeof e.title === 'string' ? e.title : e.title?.['#text'] || '',
|
|
link: link || '',
|
|
guid: e.id || link || '',
|
|
pubDate: e.updated || e.published ? new Date(e.updated || e.published).toISOString() : null,
|
|
description: stripHtml(e.summary?.['#text'] || e.summary || e.content?.['#text'] || ''),
|
|
source: feed.name,
|
|
};
|
|
});
|
|
}
|
|
return [];
|
|
}
|
|
|
|
async function main() {
|
|
const feeds = JSON.parse(fs.readFileSync(FEEDS_FILE, 'utf8')).filter((f) => f.enabled);
|
|
if (feeds.length === 0) {
|
|
console.log('no enabled feeds — writing empty news.json');
|
|
fs.mkdirSync(DATA_DIR, { recursive: true });
|
|
fs.writeFileSync(OUT_FILE, JSON.stringify({ updatedAt: new Date().toISOString(), items: [] }, null, 2));
|
|
return;
|
|
}
|
|
|
|
const all = [];
|
|
for (const feed of feeds) {
|
|
try {
|
|
const xml = await fetchFeed(feed.url, TIMEOUT_MS);
|
|
const items = extractItems(xml, feed);
|
|
const max = feed.max || 20;
|
|
all.push(...items.slice(0, max));
|
|
console.log(`OK ${feed.name}: ${items.length} (kept ${Math.min(items.length, max)})`);
|
|
} catch (e) {
|
|
console.warn(`FAIL ${feed.name}: ${e.message}`);
|
|
}
|
|
}
|
|
|
|
const seen = new Set();
|
|
const deduped = [];
|
|
for (const it of all) {
|
|
const key = it.guid || it.link;
|
|
if (!key || seen.has(key)) continue;
|
|
seen.add(key);
|
|
deduped.push(it);
|
|
}
|
|
deduped.sort((a, b) => (b.pubDate || '').localeCompare(a.pubDate || ''));
|
|
|
|
const out = { updatedAt: new Date().toISOString(), items: deduped.slice(0, HARD_CAP) };
|
|
fs.mkdirSync(DATA_DIR, { recursive: true });
|
|
fs.writeFileSync(OUT_FILE, JSON.stringify(out, null, 2));
|
|
console.log(`-> ${OUT_FILE}: ${out.items.length} items`);
|
|
}
|
|
|
|
main().catch((e) => {
|
|
console.error(e);
|
|
process.exit(1);
|
|
});
|