init: Vite+React+Tailwind v2 site with HTML content from WP, RSS feed, external feed aggregator, prerender

This commit is contained in:
striker
2026-05-21 01:11:26 +03:00
commit 76cdeb8b48
42 changed files with 6317 additions and 0 deletions

97
scripts/convert_posts.py Normal file
View File

@@ -0,0 +1,97 @@
#!/usr/bin/env python3
"""Convert WP posts-raw.jsonl → src/content/{posts,pages}.json with image URL rewrite + translit slugs."""
import json
import re
import sys
from pathlib import Path
from urllib.parse import unquote
ROOT = Path(__file__).resolve().parent.parent
RAW = ROOT / "scripts" / "posts-raw.jsonl"
OUT_DIR = ROOT / "src" / "content"
OUT_DIR.mkdir(parents=True, exist_ok=True)
TRANSLIT = {
'а': 'a', 'б': 'b', 'в': 'v', 'г': 'g', 'д': 'd', 'е': 'e', 'ё': 'yo',
'ж': 'zh', 'з': 'z', 'и': 'i', 'й': 'y', 'к': 'k', 'л': 'l', 'м': 'm',
'н': 'n', 'о': 'o', 'п': 'p', 'р': 'r', 'с': 's', 'т': 't', 'у': 'u',
'ф': 'f', 'х': 'h', 'ц': 'ts', 'ч': 'ch', 'ш': 'sh', 'щ': 'sch',
'ъ': '', 'ы': 'y', 'ь': '', 'э': 'e', 'ю': 'yu', 'я': 'ya',
}
def slugify_ru(s: str) -> str:
s = unquote(s).lower()
out = []
for ch in s:
if ch in TRANSLIT:
out.append(TRANSLIT[ch])
elif ch.isalnum() or ch in '-_':
out.append(ch)
elif ch in ' \t':
out.append('-')
res = ''.join(out)
res = re.sub(r'-+', '-', res).strip('-')
return res or 'untitled'
UPLOAD_RE = re.compile(r'https?://(?:www\.)?pushkinohistory\.ru/wp-content/uploads/[^/]+/[^/]+/([^"\'\s)]+)')
def rewrite_uploads(html: str) -> str:
return UPLOAD_RE.sub(r'/uploads/\1', html)
CATEGORIES = {
20: [], 23: [], 73: [], 94: [], # pages — no category
137: ['main'], 139: ['main'],
142: ['main'], 145: ['main', 'today', 'tech'],
158: ['main', 'tech'], 226: ['main'], 235: ['main'],
}
CATEGORY_NAMES = {
'main': 'Главная',
'today': 'Настоящее',
'tech': 'Техническое',
}
def main() -> None:
posts: list[dict] = []
pages: list[dict] = []
with RAW.open(encoding='utf-8') as f:
for line in f:
line = line.strip()
if not line:
continue
row = json.loads(line)
old_slug = row['name']
new_slug = slugify_ru(old_slug if '%' in old_slug else old_slug)
html = rewrite_uploads(row['content'])
item = {
'id': row['id'],
'slug': new_slug,
'oldSlug': old_slug,
'title': row['title'],
'date': row['date'],
'excerpt': row.get('excerpt') or '',
'html': html,
'categories': [CATEGORY_NAMES[c] for c in CATEGORIES.get(row['id'], [])],
'categorySlugs': CATEGORIES.get(row['id'], []),
}
if row['type'] == 'post':
posts.append(item)
else:
pages.append(item)
posts.sort(key=lambda p: p['date'], reverse=True)
pages.sort(key=lambda p: p['id'])
(OUT_DIR / 'posts.json').write_text(
json.dumps(posts, ensure_ascii=False, indent=2), encoding='utf-8'
)
(OUT_DIR / 'pages.json').write_text(
json.dumps(pages, ensure_ascii=False, indent=2), encoding='utf-8'
)
print(f'posts: {len(posts)} → src/content/posts.json')
print(f'pages: {len(pages)} → src/content/pages.json')
for p in posts:
print(f" post: /{p['slug']}/ (was: {p['oldSlug'][:40]}{'...' if len(p['oldSlug']) > 40 else ''}) — {p['title']}")
for p in pages:
print(f" page: /{p['slug']}/ — {p['title']}")
if __name__ == '__main__':
main()