forked from github.com/GenderDysphoria.fyi
More analytics work
This commit is contained in:
@@ -2,9 +2,20 @@ const { gunzip } = require('zlib');
|
||||
const { promisify } = require('util');
|
||||
const { S3 } = require('aws-sdk');
|
||||
const { unescape } = require('querystring');
|
||||
const parseUA = require('ua-parser-js');
|
||||
const format = require('date-fns/format');
|
||||
const { URL } = require('url');
|
||||
|
||||
const gunzipAsync = promisify(gunzip);
|
||||
|
||||
function url (input) {
|
||||
try {
|
||||
const { hash, host, hostname, href, origin, password, pathname, port, protocol, search, searchParams, username } = new URL(input); // eslint-disable-line max-len
|
||||
return { hash, host, hostname, href, origin, password, pathname, port, protocol, search, searchParams, username };
|
||||
} catch (e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Parsing the line containing the version.
|
||||
//
|
||||
@@ -31,7 +42,7 @@ const parseFields = (line) => {
|
||||
if (!line.startsWith('#Fields:')) {
|
||||
throw new Error(`Invalid fields line '${line}'`);
|
||||
} else {
|
||||
return line.match(/[\w()-]+(\s|$)/g).map(field => (
|
||||
return line.match(/[\w()-]+(\s|$)/g).map((field) => (
|
||||
// Strip parentheses and remove unecessary abbreviations in field names
|
||||
field.replace(/\(([^)]+)\)/, '-$1').replace(/^(c-|cs-|sc-)/, '').trim().toLowerCase()
|
||||
));
|
||||
@@ -41,7 +52,7 @@ const parseFields = (line) => {
|
||||
// Unescape value twice (because fuck you that's why).
|
||||
// https://forums.aws.amazon.com/thread.jspa?threadID=134017
|
||||
//
|
||||
const decode = value => unescape(unescape(value));
|
||||
const decode = (value) => unescape(unescape(value));
|
||||
|
||||
// Split up line and assign to corresponding field.
|
||||
//
|
||||
@@ -49,11 +60,76 @@ const parseLine = (line, fields) => {
|
||||
if (line.startsWith('#')) {
|
||||
throw new Error(`Invalid log line '${line}'`);
|
||||
} else {
|
||||
return line.split('\t').reduce((object, section, index) => {
|
||||
let row = line.split('\t').reduce((object, section, index) => {
|
||||
const result = object;
|
||||
if (section !== '-') result[fields[index]] = decode(section); // Skip missing fields
|
||||
return result;
|
||||
}, {});
|
||||
|
||||
|
||||
// filter out OPTIONS calls
|
||||
if (row.method === 'OPTIONS') return;
|
||||
|
||||
// I only care about the pixel hits, nothing else.
|
||||
if (row['uri-stem'] !== '/i') return;
|
||||
|
||||
// this isn't an analytics event
|
||||
if (!row.referer) return;
|
||||
|
||||
row = Object.fromEntries(Object.entries(row).map(([ k, v ]) => [ k.replace(/-/g, '_'), v ]));
|
||||
|
||||
const query = (row.uri_query)
|
||||
? Object.fromEntries(new URLSearchParams(row.uri_query))
|
||||
: {}
|
||||
;
|
||||
|
||||
const useragent = parseUA(row.user_agent);
|
||||
|
||||
const sessionStart = Number(query.start);
|
||||
const sessionEnd = query.end === 'null' ? 0 : Number(query.end);
|
||||
const duration = sessionEnd > sessionStart ? Math.floor((sessionEnd - sessionStart) / 1000) : null;
|
||||
|
||||
let {
|
||||
language,
|
||||
viewed,
|
||||
max_scroll,
|
||||
page_height,
|
||||
viewport_height,
|
||||
} = query;
|
||||
|
||||
max_scroll = parseInt(max_scroll, 10) || 0;
|
||||
page_height = parseInt(page_height, 10) || 0;
|
||||
viewport_height = parseInt(viewport_height, 10) || 0;
|
||||
|
||||
const { pathname } = url(row.referer) || {};
|
||||
const { hostname: referrer_host, href: referrer } = url(query.referrer) || {};
|
||||
|
||||
const result = {
|
||||
dts: `${row.date} ${row.time}`,
|
||||
ip: row.ip,
|
||||
tid: query.tid !== 'false' ? query.tid : null,
|
||||
url: pathname,
|
||||
referrer,
|
||||
referrer_host,
|
||||
client_start: format(new Date(sessionStart), 'yyyy-MM-dd HH:mm:ss'),
|
||||
client_end: sessionEnd ? format(new Date(sessionStart), 'yyyy-MM-dd HH:mm:ss') : null,
|
||||
duration,
|
||||
language,
|
||||
viewed,
|
||||
max_scroll,
|
||||
page_height,
|
||||
viewport_height,
|
||||
browser: useragent.browser.name,
|
||||
browser_version: useragent.browser.major,
|
||||
os: useragent.os.name + ' ' + useragent.os.version,
|
||||
device_type: useragent.device && useragent.device.type || null,
|
||||
device: useragent.device && useragent.device.vendor && useragent.device.vendor + ' ' + useragent.device.model || null,
|
||||
useragent,
|
||||
query,
|
||||
original: row,
|
||||
};
|
||||
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -79,5 +155,9 @@ exports.parseLogFile = async ({ bucket, key, region }) => {
|
||||
// Shift next line containing fields format and parse it for validation
|
||||
const fields = parseFields(lines.shift());
|
||||
|
||||
return lines.map(line => parseLine(line, fields));
|
||||
console.log(`Found ${lines.length} rows to parse`); // eslint-disable-line no-console
|
||||
const rows = lines.map((line) => parseLine(line, fields)).filter(Boolean);
|
||||
console.log(`Produced ${rows.length} results`);
|
||||
console.log('Sample', rows[0]);
|
||||
return rows;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user