#!/usr/bin/env node
/**
 * Тянет внешние RSS-фиды из src/data/feeds.json и записывает агрегированный
 * news.json в DATA_DIR (по умолчанию ./data). Запускается по cron на хосте.
 *
 * Использование:
 *   node scripts/pull-external-rss.mjs           # пишет в ./data/news.json
 *   DATA_DIR=/abs/path node scripts/pull-external-rss.mjs
 */
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { XMLParser } from 'fast-xml-parser';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const ROOT = path.resolve(__dirname, '..');
const FEEDS_FILE = path.join(ROOT, 'src', 'data', 'feeds.json');
const DATA_DIR = process.env.DATA_DIR || path.join(ROOT, 'data');
const OUT_FILE = path.join(DATA_DIR, 'news.json');
const TIMEOUT_MS = 15000;
const HARD_CAP = 200;

const parser = new XMLParser({
  ignoreAttributes: false,
  attributeNamePrefix: '@_',
  textNodeName: '#text',
});

async function fetchFeed(url, timeoutMs) {
  const ctl = new AbortController();
  const t = setTimeout(() => ctl.abort(), timeoutMs);
  try {
    const r = await fetch(url, {
      signal: ctl.signal,
      headers: { 'User-Agent': 'pushkinohistory-ru-v2 RSS aggregator' },
    });
    if (!r.ok) throw new Error(`HTTP ${r.status}`);
    return await r.text();
  } finally {
    clearTimeout(t);
  }
}

function stripHtml(s) {
  if (!s) return '';
  return String(s).replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim().slice(0, 400);
}

function extractItems(xml, feed) {
  const parsed = parser.parse(xml);
  const rssItems = parsed?.rss?.channel?.item;
  if (rssItems) {
    const arr = Array.isArray(rssItems) ? rssItems : [rssItems];
    return arr.map((it) => ({
      title: typeof it.title === 'string' ? it.title : it.title?.['#text'] || '',
      link: typeof it.link === 'string' ? it.link : it.link?.['#text'] || '',
      guid: typeof it.guid === 'string' ? it.guid : it.guid?.['#text'] || it.link || '',
      pubDate: it.pubDate ? new Date(it.pubDate).toISOString() : null,
      description: stripHtml(it.description || it['content:encoded'] || ''),
      source: feed.name,
    }));
  }
  const atomEntries = parsed?.feed?.entry;
  if (atomEntries) {
    const arr = Array.isArray(atomEntries) ? atomEntries : [atomEntries];
    return arr.map((e) => {
      const link = Array.isArray(e.link)
        ? e.link[0]?.['@_href']
        : e.link?.['@_href'] || e.link;
      return {
        title: typeof e.title === 'string' ? e.title : e.title?.['#text'] || '',
        link: link || '',
        guid: e.id || link || '',
        pubDate: e.updated || e.published ? new Date(e.updated || e.published).toISOString() : null,
        description: stripHtml(e.summary?.['#text'] || e.summary || e.content?.['#text'] || ''),
        source: feed.name,
      };
    });
  }
  return [];
}

async function main() {
  const feeds = JSON.parse(fs.readFileSync(FEEDS_FILE, 'utf8')).filter((f) => f.enabled);
  if (feeds.length === 0) {
    console.log('no enabled feeds — writing empty news.json');
    fs.mkdirSync(DATA_DIR, { recursive: true });
    fs.writeFileSync(OUT_FILE, JSON.stringify({ updatedAt: new Date().toISOString(), items: [] }, null, 2));
    return;
  }

  const all = [];
  for (const feed of feeds) {
    try {
      const xml = await fetchFeed(feed.url, TIMEOUT_MS);
      const items = extractItems(xml, feed);
      const max = feed.max || 20;
      all.push(...items.slice(0, max));
      console.log(`OK ${feed.name}: ${items.length} (kept ${Math.min(items.length, max)})`);
    } catch (e) {
      console.warn(`FAIL ${feed.name}: ${e.message}`);
    }
  }

  const seen = new Set();
  const deduped = [];
  for (const it of all) {
    const key = it.guid || it.link;
    if (!key || seen.has(key)) continue;
    seen.add(key);
    deduped.push(it);
  }
  deduped.sort((a, b) => (b.pubDate || '').localeCompare(a.pubDate || ''));

  const out = { updatedAt: new Date().toISOString(), items: deduped.slice(0, HARD_CAP) };
  fs.mkdirSync(DATA_DIR, { recursive: true });
  fs.writeFileSync(OUT_FILE, JSON.stringify(out, null, 2));
  console.log(`-> ${OUT_FILE}: ${out.items.length} items`);
}

main().catch((e) => {
  console.error(e);
  process.exit(1);
});