Ver código fonte

Add rudimentary Urchin-style metrics and Trog::Log::Metrics

George Baugh 1 ano atrás
pai
commit
d93a05769f
5 arquivos alterados com 193 adições e 7 exclusões
  1. 7 1
      lib/TCMS.pm
  2. 0 1
      lib/Trog/Log.pm
  3. 24 3
      lib/Trog/Log/DBI.pm
  4. 77 0
      lib/Trog/Log/Metrics.pm
  5. 85 2
      schema/log.schema

+ 7 - 1
lib/TCMS.pm

@@ -123,11 +123,14 @@ sub _app {
     #TODO: Actually do something with the acceptable output formats in the renderer
     my $accept = $env->{HTTP_ACCEPT};
 
-    # These two parameters are entirely academic, as no integration with any kind of analytics is implemented.
+    # NOTE These two parameters are entirely academic, as we don't use ad tracking cookies, but the UTM parameters.
+    # UTMs are actually fully sufficient to get you what you want -- e.g. keywords, audience groups, a/b testing, etc.
+    # and you need to put up cookie consent banners if you bother using tracking cookies, which are horrific UX.
     #my $no_track = $env->{HTTP_DNT};
     #my $no_sell_info = $env->{HTTP_SEC_GPC};
 
     # Set the referer & ua to go into DB logs, but not logs in general.
+    # The referer/ua largely has no importance beyond being a proto bug report for log messages.
     $Trog::Log::DBI::referer = $env->{HTTP_REFERER};
     $Trog::Log::DBI::ua      = $env->{HTTP_UA};
 
@@ -209,6 +212,9 @@ sub _app {
     $query->{method} = $method;
     $query->{route}  = $path;
 
+    # Set the urchin parameters if necessary.
+    %$Trog::Log::DBI::urchin = map { $_ => $query->{$_} } qw{utm_source utm_medium utm_campaign utm_term utm_content};
+
     # Disallow any paths that are naughty ( starman auto-removes .. up-traversal)
     if ( index( $path, '/templates' ) == 0 || index( $path, '/statics' ) == 0 || $path =~ m/.*(\.psgi|\.pm)$/i ) {
         return _forbidden($query);

+ 0 - 1
lib/Trog/Log.pm

@@ -66,7 +66,6 @@ sub log_init {
 my $rq;
 
 sub _dbh {
-    # Too many writers = lock sadness, so just give each fork it's own DBH.
 	return Trog::SQLite::dbh( 'schema/log.schema', "logs/log.db" );
 }
 

+ 24 - 3
lib/Trog/Log/DBI.pm

@@ -8,7 +8,10 @@ use parent qw{Log::Dispatch::DBI};
 use Ref::Util qw{is_arrayref};
 use Capture::Tiny qw{capture_merged};
 
-our ($referer, $ua);
+use POSIX qw{mktime};
+use POSIX::strptime qw{strptime};
+
+our ($referer, $ua, $urchin);
 
 sub create_statement {
     my $self = shift;
@@ -19,6 +22,9 @@ sub create_statement {
     my $sql2 = "INSERT INTO messages (uuid, message) VALUES (?,?)";
     $self->{sth2} = $self->{dbh}->prepare($sql2);
 
+    my $sql3 = "INSERT INTO urchin_requests (request_uuid, utm_source, utm_medium, utm_campaign, utm_term, utm_content) VALUES (?,?,?,?,?,?)";
+    $self->{sth3} = $self->{dbh}->prepare($sql3);
+
     return $self->{dbh}->prepare($sql);
 }
 
@@ -44,13 +50,23 @@ sub log_message {
     # If this is a mangled log, forget it.
     return unless $date && $uuid;
 
-    # Allow callers to set referer.
+	# 2024-01-20T22:37:41Z
+    # Transform the date into an epoch so we can do math on it
+    my $fmt = "%Y-%m-%dT%H:%M:%SZ";
+    my @cracked = strptime($date, $fmt);
+    #XXX get a dumb warning otherwise
+    pop @cracked;
+    my $epoch = mktime(@cracked);
+
+    # Allow callers to set quasi-tracking parameters.
     # We only care about this in DB context, as it's only for metrics, which are irrelevant in text logs/debugging.
     $referer //= 'none';
     $ua      //= 'none';
+    $urchin  //= {};
 
-    my $res = $self->{sth}->execute($uuid, $date, $ip, $user, $method, $route, $referer, $ua, $code );
+    my $res = $self->{sth}->execute($uuid, $epoch, $ip, $user, $method, $route, $referer, $ua, $code );
 
+    # Dump in the accumulated messages
     if (is_arrayref($buffer{$uuid}) && @{$buffer{$uuid}}) {
         $self->{sth2}->bind_param_array(1, $uuid);
         $self->{sth2}->bind_param_array(2, $buffer{$uuid});
@@ -58,6 +74,11 @@ sub log_message {
         delete $buffer{$uuid};
     }
 
+    # Record urchin data if there is any.
+    if (%$urchin && $urchin->{utm_source}) {
+        $self->{sth3}->execute($uuid, $urchin->{utm_source}, $urchin->{utm_medium}, $urchin->{utm_campaign}, $urchin->{utm_term}, $urchin->{utm_content});
+    }
+
     return $res;
 }
 

+ 77 - 0
lib/Trog/Log/Metrics.pm

@@ -0,0 +1,77 @@
+package Trog::Log::Metrics;
+
+use strict;
+use warnings;
+
+no warnings 'experimental';
+use feature qw{signatures state};
+
+use Trog::SQLite;
+
+=head1 Trog::Log::Metrics
+
+A means for acquiring time-series representations of the data recorded by Trog::Log::DBI,
+and for reasoning about the various things that it's Urchin-compatible data can give you.
+
+=cut
+
+sub _dbh {
+	return Trog::SQLite::dbh( 'schema/log.schema', "logs/log.db" );
+}
+
+=head2 requests_per(ENUM period{second,minute,hour,day,month,year}, INTEGER num_periods, [TIME_T before], INTEGER[] @codes)
+
+Returns a data structure of the following form
+
+    {
+        labels => [TIME_STR, TIME_STR, ...],
+        data   => [INT, INT,...]
+    }
+
+Describing the # of requests for the requested $num_periods $period(s) before $before.
+
+'month' and 'year' are approximations for performance reasons; 30 day and 365 day periods.
+
+Optionally filter by response code(s).
+
+=cut
+
+sub requests_per ($period, $num_periods, $before, @codes) {
+    $before ||= time;
+
+	# Build our periods in seconds.
+	state %period2time = (
+		second => 1,
+		minute => 60,
+		hour   => 3600,
+		day    => 86400,
+		week   => 604800,
+		month  => 2592000,
+		year   => 31356000,
+	);
+
+	my $interval = $period2time{$period};
+	die "Invalid time interval passed." unless $interval;
+
+	my @input;
+	my $whereclause = '';
+	if (@codes) {
+		my $bind = join(',', (map { '?' } @codes));
+		$whereclause = "WHERE code IN ($bind)";
+		push(@input, @codes);
+	}
+	push(@input, $interval, $before, $num_periods);
+
+    my $query = "SELECT count(*) FROM all_requests $whereclause GROUP BY date / ? HAVING date < ? LIMIT ?";
+
+    my @results = map { $_->[0] } @{ _dbh()->selectall_arrayref($query, undef, @input) };
+	my $np = @results < $num_periods ? @results : $num_periods;
+	my @labels = reverse map { "$_ $period(s) ago" } (1..$np);
+
+	return {
+		labels => \@labels,
+		data   => \@results,
+	};
+}
+
+1;

+ 85 - 2
schema/log.schema

@@ -31,8 +31,9 @@ CREATE TABLE IF NOT EXISTS ua (
 );
 
 CREATE TABLE IF NOT EXISTS requests (
-    uuid TEXT PRIMARY KEY,
-    date TEXT NOT NULL,
+    id INTEGER PRIMARY KEY,
+    uuid TEXT NOT NULL UNIQUE,
+    date INTEGER NOT NULL,
     host_id INTEGER NOT NULL REFERENCES seen_hosts(id) ON DELETE CASCADE,
     user_id INTEGER NOT NULL REFERENCES seen_users(id) ON DELETE CASCADE,
     route_id INTEGER NOT NULL REFERENCES seen_routes(id) ON DELETE CASCADE,
@@ -41,8 +42,89 @@ CREATE TABLE IF NOT EXISTS requests (
     response_code_id INTEGER NOT NULL REFERENCES response_code(id) ON DELETE RESTRICT
 );
 
+/* Urchin stuff - it's powerful to be able to do things in backend based on campaign, even if you use a JS frontend. */
+CREATE TABLE IF NOT EXISTS urchin_source (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    value TEXT NOT NULL UNIQUE
+);
+CREATE TABLE IF NOT EXISTS urchin_medium (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    value TEXT NOT NULL UNIQUE
+);
+CREATE TABLE IF NOT EXISTS urchin_campaign (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    value TEXT NOT NULL UNIQUE
+);
+CREATE TABLE IF NOT EXISTS urchin_term (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    value TEXT NOT NULL UNIQUE
+);
+CREATE TABLE IF NOT EXISTS urchin_content (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    value TEXT NOT NULL UNIQUE
+);
+
+CREATE TABLE IF NOT EXISTS urchin (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    request_id INTEGER NOT NULL UNIQUE REFERENCES requests(id) ON DELETE CASCADE,
+    source_id INTEGER NOT NULL REFERENCES urchin_source(id) ON DELETE CASCADE,
+    medium_id INTEGER REFERENCES urchin_medium(id) ON DELETE CASCADE,
+    campaign_id INTEGER REFERENCES urchin_campaign(id) ON DELETE CASCADE,
+    term_id INTEGER REFERENCES urchin_term(id) ON DELETE CASCADE,
+    content_id INTEGER REFERENCES urchin_content(id) ON DELETE CASCADE
+);
+
+CREATE VIEW IF NOT EXISTS urchin_requests AS 
+    SELECT
+        u.id,
+        r.uuid   AS request_uuid,
+        us.value AS utm_source,
+        um.value AS utm_medium,
+        uc.value AS utm_campaign,
+        ut.value AS utm_term,
+        uo.value AS utm_content
+    FROM
+        urchin AS u
+    JOIN
+        requests AS r ON u.request_id = r.id
+    JOIN
+        urchin_source AS us ON us.id = u.source_id
+    LEFT JOIN
+        urchin_medium AS um ON um.id = u.medium_id
+    LEFT JOIN
+        urchin_campaign AS uc ON uc.id = u.campaign_id
+    LEFT JOIN
+        urchin_term AS ut ON ut.id = u.term_id
+    LEFT JOIN
+        urchin_content AS uo ON uo.id = u.content_id;
+
+/* Make urchin_requests a writable view via triggers.  We will always stomp the main row, as the last update will be what we want. */
+CREATE TRIGGER IF NOT EXISTS insert_urchin_requests INSTEAD OF INSERT ON urchin_requests BEGIN
+    INSERT OR IGNORE INTO urchin_source   (value) VALUES (NEW.utm_source);
+    INSERT OR IGNORE INTO urchin_medium   (value) VALUES (NEW.utm_medium);
+    INSERT OR IGNORE INTO urchin_campaign (value) VALUES (NEW.utm_campaign);
+    INSERT OR IGNORE INTO urchin_term     (value) VALUES (NEW.utm_term);
+    INSERT OR IGNORE INTO urchin_content  (value) VALUES (NEW.utm_content);
+    INSERT OR REPLACE INTO urchin SELECT
+        NEW.id,
+        r.id  AS request_id,
+        us.id AS source_id,
+        um.id AS medium_id,
+        uc.id AS campaign_id,
+        ut.id AS term_id,
+        uo.id AS content_id
+    FROM requests AS r
+    JOIN      urchin_source   AS us ON us.value = NEW.utm_source
+    LEFT JOIN urchin_medium   AS um ON um.value = NEW.utm_medium
+    LEFT JOIN urchin_campaign AS uc ON uc.value = NEW.utm_campaign
+    LEFT JOIN urchin_term     AS ut ON ut.value = NEW.utm_term
+    LEFT JOIN urchin_content  AS uo ON uo.value = NEW.utm_content
+    WHERE r.uuid = NEW.request_uuid;
+END;
+
 CREATE VIEW IF NOT EXISTS all_requests AS
     SELECT
+        q.id,
         q.uuid,
         q.date,
         h.ip_address,
@@ -76,6 +158,7 @@ CREATE TRIGGER IF NOT EXISTS insert_all_requests INSTEAD OF INSERT ON all_reques
     INSERT OR IGNORE INTO referer       (referer)      VALUES (NEW.referer);
     INSERT OR IGNORE INTO ua            (ua)           VALUES (NEW.ua);
     INSERT OR REPLACE INTO requests SELECT
+        NEW.id,
         NEW.uuid,
         NEW.date,
         h.id AS host_id,