mirror of
https://github.com/GenderDysphoria/GenderDysphoria.fyi.git
synced 2025-11-25 12:32:42 +00:00
Added tweet translation mechanism (#96)
* Added tweet translation mechanism. * Removed test translation. It was done via Google Translate which I don't trust for this application.
This commit is contained in:
@@ -71,6 +71,17 @@ module.exports = exports = class Manifest {
|
||||
}
|
||||
|
||||
async get (task) {
|
||||
if (task === undefined || task === null) {
|
||||
console.error(task);
|
||||
throw new Error('Task action is undefined or null.');
|
||||
return;
|
||||
}
|
||||
if (task.input === undefined || task.input === null) {
|
||||
console.error(task);
|
||||
throw new Error('Task action is missing input. (tip: remove `twitter-cache.json` and run `gulp` again)');
|
||||
return;
|
||||
}
|
||||
|
||||
const hash = this.hash(task);
|
||||
const { input, output, cache: altCachePath } = task;
|
||||
const ext = path.extname(task.output);
|
||||
|
||||
138
build/engines.js
138
build/engines.js
@@ -1,4 +1,3 @@
|
||||
|
||||
const path = require('path');
|
||||
|
||||
const fs = require('fs-extra');
|
||||
@@ -16,6 +15,18 @@ const i18n = require('./lang');
|
||||
|
||||
const mAnchor = require('markdown-it-anchor');
|
||||
|
||||
const dateFNS = require('date-fns');
|
||||
const dateFNSLocales = require('date-fns/locale');
|
||||
const str2locale = {
|
||||
'en': dateFNSLocales.enUS,
|
||||
'zh': dateFNSLocales.zhCN,
|
||||
'de': dateFNSLocales.de,
|
||||
'fr': dateFNSLocales.fr,
|
||||
'hu': dateFNSLocales.hu,
|
||||
'pl': dateFNSLocales.pl,
|
||||
'es': dateFNSLocales.es
|
||||
};
|
||||
|
||||
const markdownEngines = {
|
||||
full: markdownIt({
|
||||
html: true,
|
||||
@@ -181,12 +192,14 @@ class Injectables {
|
||||
|
||||
helpers () {
|
||||
return {
|
||||
import: this.import(),
|
||||
markdown: this.markdown(),
|
||||
icon: this.icon(),
|
||||
prod: this.production(),
|
||||
rev: this.rev(),
|
||||
lang: this.lang(),
|
||||
import: this.import(),
|
||||
markdown: this.markdown(),
|
||||
icon: this.icon(),
|
||||
coalesce: this.coalesce(),
|
||||
prod: this.production(),
|
||||
rev: this.rev(),
|
||||
lang: this.lang(),
|
||||
date: this.date(),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -281,4 +294,115 @@ class Injectables {
|
||||
};
|
||||
}
|
||||
|
||||
// Given a list of arguments, returns the firt that isn't undefined
|
||||
coalesce () {
|
||||
return function (...raw_args) {
|
||||
const { arguments: args } = raw_args.pop();
|
||||
for (let arg in args) {
|
||||
if (args[arg] !== undefined) {
|
||||
return args[arg];
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
};
|
||||
}
|
||||
|
||||
// Multi tool for printing dates
|
||||
//
|
||||
// {{date}} -> prints current date
|
||||
// {{date datestr}} -> prints date in datestr
|
||||
// {{date datestr datefmt}} -> prints date in datestr in format datefmt
|
||||
// {{date datestr datefmt lang}} -> prints date in datestr in format datefmt according to conventions for language lang
|
||||
//
|
||||
// Datestr can be the string "now", `undefined`, and anything parsable by `new Date()`.
|
||||
//
|
||||
// If lang is not specified, it will be extracted from the page metadata. If that is not available, English will be assumed.
|
||||
// In case of errors, the date will be returned as an ISO string if possible and its raw datestr input otherwise.
|
||||
// Datefmt format is available at https://date-fns.org/v2.25.0/docs/format
|
||||
//
|
||||
// Common formats:
|
||||
// - "h:mm aa - EEE, LLL do, yyyy" = 12 hour clock, e.g. '1:28 PM - Sat, Feb 15th, 2020' (en) or '1:28 PM - sam., 15/févr./2020' (fr)
|
||||
// - "hh:mm - EEE, LLL do, yyyy" = 24 hour clock, e.g. '13:28 - Sat, Feb 15th, 2020' (en) or '13:28 - sam., 15/févr./2020' (fr)
|
||||
// - "yyyy-MM-dd'T'HH:mm:ss.SSSXXX" or "iso" = ISO 8601 format, e.g. '2020-02-15T13:28:02.000Z'
|
||||
date () {
|
||||
return function (...args) {
|
||||
let extra = args.pop();
|
||||
let datestr, dateobj, datefmt, lang;
|
||||
|
||||
const { resolve: rval } = extra;
|
||||
const filename = rval('@value.input');
|
||||
lang = (rval('@root.this.page.lang') || 'en').split('-')[0];
|
||||
|
||||
switch (args.length) {
|
||||
case 0:
|
||||
datestr = "now";
|
||||
break;
|
||||
case 1:
|
||||
datestr = args[0];
|
||||
break;
|
||||
case 2:
|
||||
datestr = args[0];
|
||||
datefmt = args[1];
|
||||
break;
|
||||
case 3:
|
||||
datestr = args[0];
|
||||
datefmt = args[1];
|
||||
lang = args[2];
|
||||
break;
|
||||
default:
|
||||
throw new Exception('wrong number of arguments for {{date}}, got '+args.length+' maximum is 3');
|
||||
}
|
||||
|
||||
if (datestr === "now" || datestr === undefined) {
|
||||
dateobj = new Date();
|
||||
} else {
|
||||
dateobj = new Date(datestr);
|
||||
}
|
||||
|
||||
if (!dateFNS.isValid(dateobj)) {
|
||||
console.trace('Invalid input for date: ', { datestr, filename, args, extra });
|
||||
return datestr.toString();
|
||||
}
|
||||
|
||||
if (datefmt == "iso") {
|
||||
return dateobj.toISOString();
|
||||
}
|
||||
|
||||
if (lang === undefined) {
|
||||
return dateobj.toISOString();
|
||||
}
|
||||
|
||||
const locale = str2locale[lang];
|
||||
if (locale === undefined) {
|
||||
console.warn('Locale not found: '+lang);
|
||||
}
|
||||
if (datefmt === undefined || locale === undefined) {
|
||||
const options = {
|
||||
weekday: 'short',
|
||||
year: 'numeric',
|
||||
month: 'short',
|
||||
day: 'numeric',
|
||||
timeZone: 'UTC',
|
||||
timeZoneName: 'short',
|
||||
hour: '2-digit',
|
||||
minute: '2-digit',
|
||||
second: '2-digit'
|
||||
};
|
||||
try {
|
||||
return dateobj.toLocaleString(lang, options);
|
||||
} catch (error) {
|
||||
console.trace('Something went horribly wrong while formating dates.', { error, filename, args, extra });
|
||||
return dateobj.toISOString();
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
return dateFNS.format(dateobj, datefmt, {locale: locale});
|
||||
} catch (error) {
|
||||
console.trace('Something went horribly wrong while formating dates.', { error, filename, args, extra });
|
||||
return dateobj.toISOString();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ const { sortBy } = require('lodash');
|
||||
|
||||
const getEngines = require('./engines');
|
||||
const primeTweets = require('./page-tweets');
|
||||
const i18nTweets = require('./page-tweets').i18n;
|
||||
const pageWriter = require('./page-writer');
|
||||
const pageConcatinator = require('./page-concatinator');
|
||||
const evaluate = require('./evaluate');
|
||||
@@ -52,6 +53,9 @@ exports.everything = function (prod = false) {
|
||||
posts = sortBy(posts, 'date');
|
||||
posts.reverse();
|
||||
|
||||
// Process i18n for tweets
|
||||
await i18nTweets();
|
||||
|
||||
const assets = [ ...PostFiles.assets, ...PublicFiles.assets ];
|
||||
|
||||
const [ tasks ] = await Promise.all([
|
||||
@@ -126,6 +130,28 @@ exports.pages = function () {
|
||||
return fn;
|
||||
};
|
||||
|
||||
let twitterProcessing = false;
|
||||
|
||||
exports.twitter = function () {
|
||||
async function fn () {
|
||||
if (twitterProcessing) {
|
||||
return;
|
||||
}
|
||||
|
||||
twitterProcessing = true;
|
||||
try {
|
||||
await i18nTweets();
|
||||
} catch (exception_var) {
|
||||
twitterProcessing = false;
|
||||
throw exception_var;
|
||||
}
|
||||
twitterProcessing = false;
|
||||
}
|
||||
|
||||
fn.displayName = 'buildTwitter';
|
||||
return fn;
|
||||
};
|
||||
|
||||
exports.task = function (action, prod = false) {
|
||||
const fn = async () => {
|
||||
const tasks = await {
|
||||
|
||||
@@ -13,6 +13,9 @@ const schema = {
|
||||
protected: true,
|
||||
},
|
||||
html: true,
|
||||
html_i18n: true,
|
||||
full_text: true,
|
||||
full_text_i18n: true,
|
||||
quoted_status_id_str: true,
|
||||
entities: { media: [ {
|
||||
type: true,
|
||||
@@ -31,6 +34,12 @@ var entityProcessors = {
|
||||
hashtags (tags, tweet) {
|
||||
tags.forEach((tagObj) => {
|
||||
tweet.html = tweet.html.replace('#' + tagObj.text, `<a href="https://twitter.com/hashtag/{tagObj.text}" class="hashtag">#${tagObj.text}</a>`);
|
||||
if (tweet.html_i18n !== undefined) {
|
||||
const langs = Object.keys(tweet.html_i18n);
|
||||
for (const lang of langs) {
|
||||
tweet.html_i18n[lang] = tweet.html_i18n[lang].replace('#' + tagObj.text, `<a href="https://twitter.com/hashtag/{tagObj.text}" class="hashtag">#${tagObj.text}</a>`);
|
||||
}
|
||||
}
|
||||
});
|
||||
},
|
||||
|
||||
@@ -41,7 +50,14 @@ var entityProcessors = {
|
||||
user_mentions (users, tweet) {
|
||||
users.forEach((userObj) => {
|
||||
var regex = new RegExp('@' + userObj.screen_name, 'gi' );
|
||||
tweet.html = tweet.html.replace(regex, `<a href="https://twitter.com/${userObj.screen_name}" class="mention">@${userObj.screen_name}</a>`);
|
||||
const mention_html = `<a href="https://twitter.com/${userObj.screen_name}" class="mention">@${userObj.screen_name}</a>`;
|
||||
tweet.html = tweet.html.replace(regex, mention_html);
|
||||
if (tweet.html_i18n !== undefined) {
|
||||
const langs = Object.keys(tweet.html_i18n);
|
||||
for (const lang of langs) {
|
||||
tweet.html_i18n[lang] = tweet.html_i18n[lang].replace(regex, mention_html);
|
||||
}
|
||||
}
|
||||
});
|
||||
},
|
||||
|
||||
@@ -49,13 +65,26 @@ var entityProcessors = {
|
||||
urls.forEach(({ url, expanded_url, display_url }) => {
|
||||
const isQT = tweet.quoted_status_permalink && url === tweet.quoted_status_permalink.url;
|
||||
const className = isQT ? 'quoted-tweet' : 'url';
|
||||
tweet.html = tweet.html.replace(url, isQT ? '' : `<a href="${expanded_url}" class="${className}">${display_url}</a>`);
|
||||
const fancy_html = `<a href="${expanded_url}" class="${className}">${display_url}</a>`;
|
||||
tweet.html = tweet.html.replace(url, isQT ? '' : fancy_html);
|
||||
if (tweet.html_i18n !== undefined) {
|
||||
const langs = Object.keys(tweet.html_i18n);
|
||||
for (const lang of langs) {
|
||||
tweet.html_i18n[lang] = tweet.html_i18n[lang].replace(url, isQT ? '' : fancy_html);
|
||||
}
|
||||
}
|
||||
});
|
||||
},
|
||||
|
||||
media (media, tweet) {
|
||||
media.forEach((m) => {
|
||||
tweet.html = tweet.html.replace(m.url, '');
|
||||
if (tweet.html_i18n !== undefined) {
|
||||
const langs = Object.keys(tweet.html_i18n);
|
||||
for (const lang of langs) {
|
||||
tweet.html_i18n[lang] = tweet.html_i18n[lang].replace(m.url, '');
|
||||
}
|
||||
}
|
||||
let width, height;
|
||||
|
||||
if (has(m, 'video_info.aspect_ratio')) {
|
||||
@@ -90,6 +119,13 @@ var entityProcessors = {
|
||||
module.exports = exports = function (tweets) {
|
||||
return tweets.length ? tweets.map(parseTweet) : parseTweet(tweets);
|
||||
|
||||
function parseStep1 (text) {
|
||||
return text.split(/(\r\n|\n\r|\r|\n)+/)
|
||||
.map((s) => s.trim() && '<p>' + s + '</p>')
|
||||
.filter(Boolean)
|
||||
.join('');
|
||||
}
|
||||
|
||||
function parseTweet (tweet) {
|
||||
// clone the tweet so we're not altering the original
|
||||
tweet = JSON.parse(JSON.stringify(tweet));
|
||||
@@ -105,11 +141,29 @@ module.exports = exports = function (tweets) {
|
||||
];
|
||||
|
||||
// Copying text value to a new property html. The final output will be set to this property
|
||||
tweet.html = (tweet.full_text || tweet.text)
|
||||
.split(/(\r\n|\n\r|\r|\n)+/)
|
||||
.map((s) => s.trim() && '<p>' + s + '</p>')
|
||||
.filter(Boolean)
|
||||
.join('');
|
||||
if (tweet.full_text !== undefined || tweet.text !== undefined) {
|
||||
tweet.html = parseStep1(tweet.full_text || tweet.text);
|
||||
}
|
||||
if (tweet.html_i18n === undefined) {
|
||||
tweet.html_i18n = {};
|
||||
}
|
||||
if (tweet.full_text_i18n === undefined) {
|
||||
tweet.full_text_i18n = {};
|
||||
}
|
||||
|
||||
// Find which languages we actually have translations for
|
||||
const possible_langs = Object.keys(tweet.full_text_i18n);
|
||||
const langs = [];
|
||||
for (const lang of possible_langs) {
|
||||
const trimed = tweet.full_text_i18n[lang].trim();
|
||||
if (trimed.length > 0) {
|
||||
langs.push(lang);
|
||||
}
|
||||
}
|
||||
|
||||
for (const lang of langs) {
|
||||
tweet.html_i18n[lang] = parseStep1(tweet.full_text_i18n[lang]);
|
||||
}
|
||||
|
||||
if (tweet.quoted_status) {
|
||||
tweet.quoted_status = parseTweet(tweet.quoted_status);
|
||||
@@ -160,8 +214,15 @@ module.exports = exports = function (tweets) {
|
||||
}
|
||||
|
||||
// Process Emoji's
|
||||
tweet.html = twemoji.parse(tweet.html);
|
||||
tweet.user.name_html = twemoji.parse(tweet.user.name);
|
||||
if (tweet.html) {
|
||||
tweet.html = twemoji.parse(tweet.html);
|
||||
}
|
||||
for (const lang of langs) {
|
||||
tweet.html_i18n[lang] = twemoji.parse(tweet.html_i18n[lang]);
|
||||
}
|
||||
if (tweet.user !== undefined && tweet.user.name !== undefined) {
|
||||
tweet.user.name_html = twemoji.parse(tweet.user.name);
|
||||
}
|
||||
|
||||
return deepPick(tweet, schema);
|
||||
}
|
||||
|
||||
@@ -5,7 +5,41 @@ const log = require('fancy-log');
|
||||
const tweetparse = require('./lib/tweetparse');
|
||||
const Twitter = require('twitter-lite');
|
||||
const { hasOwn } = require('./lib/util');
|
||||
var twemoji = require('twemoji' );
|
||||
|
||||
function tweetText2Html(tweet_text) {
|
||||
let answer = tweet_text.split(/(\r\n|\n\r|\r|\n)+/)
|
||||
.map((s) => s.trim() && '<p>' + s + '</p>')
|
||||
.filter(Boolean)
|
||||
.join('');
|
||||
answer = twemoji.parse(answer);
|
||||
return answer;
|
||||
}
|
||||
|
||||
function applyI18N(original_tweet, twitter_i18n) {
|
||||
const id = original_tweet.id_str;
|
||||
// Make a shallow copy
|
||||
let tweet = Object.assign({}, original_tweet);
|
||||
|
||||
// Do we have a trnslation for this tweet?
|
||||
if (twitter_i18n[id] === undefined) {
|
||||
// If not, delete any translation fields just in case
|
||||
delete tweet.html_i18n;
|
||||
delete tweet.full_text_i18n;
|
||||
} else {
|
||||
// If yes, add the translations
|
||||
const originalLang = tweet["lang"] || "x-original";
|
||||
tweet.full_text_i18n = twitter_i18n[id].full_text_i18n;
|
||||
if (originalLang in tweet.full_text_i18n && tweet.full_text_i18n[originalLang] != tweet.full_text) {
|
||||
log.warn("Original text not matching for tweet "+id, { expected: tweet.full_text, got: tweet.full_text_i18n[originalLang]});
|
||||
} else {
|
||||
tweet.full_text_i18n[originalLang] = tweet.full_text;
|
||||
}
|
||||
}
|
||||
|
||||
// Return the tweet with the translations
|
||||
return tweet;
|
||||
}
|
||||
|
||||
module.exports = exports = async function tweets (pages) {
|
||||
const [ twitter, twitterBackup, twitterCache ] = await Promise.all([
|
||||
@@ -40,7 +74,6 @@ module.exports = exports = async function tweets (pages) {
|
||||
if (tweet.quoted_status_id_str && !twitterCache[tweet.quoted_status_id_str]) {
|
||||
tweetsNeeded.push(tweet.quoted_status_id_str);
|
||||
}
|
||||
// if (!twitterBackup[tweet.id_str]) twitterBackup[tweet.id_str] = tweet;
|
||||
twitterBackup[tweet.id_str] = tweet;
|
||||
twitterCache[tweet.id_str] = tweetparse(tweet);
|
||||
loaded.push(tweet.id_str);
|
||||
@@ -145,3 +178,26 @@ exports.attachTweets = function (tweetids, tweets) {
|
||||
return dict;
|
||||
}, {});
|
||||
};
|
||||
|
||||
exports.i18n = async function() {
|
||||
const [ twitterBackup, twitterCache, twitterI18N ] = await Promise.all([
|
||||
fs.readJson(resolve('twitter-backup.json')),
|
||||
fs.readJson(resolve('twitter-cache.json')).catch(() => ({})),
|
||||
fs.readJson(resolve('twitter-i18n.json')),
|
||||
]);
|
||||
|
||||
const twitterCacheBkp = JSON.stringify(twitterCache, null, 2);
|
||||
|
||||
// Make sure no translation is forgotten
|
||||
for (const id in twitterI18N) {
|
||||
if (id in twitterBackup) {
|
||||
twitterCache[id] = applyI18N(twitterBackup[id], twitterI18N);
|
||||
twitterCache[id] = tweetparse(twitterCache[id]);
|
||||
}
|
||||
}
|
||||
|
||||
const twitterCacheJson = JSON.stringify(twitterCache, null, 2);
|
||||
if (twitterCacheBkp != twitterCacheJson) {
|
||||
await fs.writeFile(resolve('twitter-cache.json'), twitterCacheJson);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -107,6 +107,7 @@ module.exports = exports = class Page extends File {
|
||||
const { titlecard, webready } = this.files = PublicFiles.for(this.dir);
|
||||
this.ignore = this.meta.ignore;
|
||||
this.draft = this.meta.draft;
|
||||
this.lang = this.lang || this.meta.lang || "en";
|
||||
this.siblings = this.meta.siblings;
|
||||
this.images = webready;
|
||||
this.titlecard = titlecard;
|
||||
|
||||
Reference in New Issue
Block a user