Browse Source

Fix #113: Index flat-file posts with sqlite

George S. Baugh 5 years ago
parent
commit
ce50645529
8 changed files with 153 additions and 4 deletions
  1. 1 0
      .gitignore
  2. 16 0
      bin/build_index.pl
  3. 0 0
      bin/migrate.pl
  4. 13 2
      lib/Trog/Data/FlatFile.pm
  5. 1 2
      lib/Trog/DataModule.pm
  6. 44 0
      lib/Trog/SQLite.pm
  7. 65 0
      lib/Trog/SQLite/TagIndex.pm
  8. 13 0
      schema/flatfile.schema

+ 1 - 0
.gitignore

@@ -4,6 +4,7 @@ favicon.ico
 dist/
 www/assets/*.*
 data/DUMMY.json
+data/posts.db
 data/files/*
 pod2htmd.tmp
 fgEmojiPicker.js

+ 16 - 0
bin/build_index.pl

@@ -0,0 +1,16 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use lib 'lib';
+use Trog::Data;
+use Trog::Config;
+use Trog::SQLite::TagIndex;
+
+# Use this to build the post index after you import data, otherwise it's not needed
+
+my $conf = Trog::Config::get();
+my $search = Trog::Data->new($conf);
+
+Trog::SQLite::TagIndex::build_index($search);

+ 0 - 0
migrate.pl → bin/migrate.pl


+ 13 - 2
lib/Trog/Data/FlatFile.pm

@@ -12,6 +12,9 @@ use File::Slurper;
 use File::Copy;
 use Mojo::File;
 
+use lib 'lib';
+use Trog::SQLite::TagIndex;
+
 use parent qw{Trog::DataModule};
 
 our $datastore = 'data/files';
@@ -28,14 +31,18 @@ You can only post once per second due to it storing each post as a file named af
 our $parser = JSON::MaybeXS->new();
 
 sub read ($self, $query={}) {
+    $query->{limit} //= 25;
+
     #Optimize direct ID
     my @index;
     if ($query->{id}) {
         @index = ("$datastore/$query->{id}");
     } else {
-        @index = $self->_index();
+        if (-f 'data/posts.db') {
+            @index = map { "$datastore/$_" } Trog::SQLite::TagIndex::posts_for_tags($query->{limit},@{$query->{tags}})
+        }
+        @index = $self->_index() unless @index;
     }
-    $query->{limit} //= 25;
 
     my @items;
     foreach my $item (@index) {
@@ -51,6 +58,7 @@ sub read ($self, $query={}) {
         my @filtered = $self->filter($query,@$parsed);
 
         push(@items,@filtered) if @filtered;
+        next if $query->{limit} == 0; # 0 = unlimited
         last if scalar(@items) == $query->{limit};
     }
 
@@ -79,6 +87,8 @@ sub write($self,$data) {
         open(my $fh, '>', $file) or confess;
         print $fh $parser->encode($update);
         close $fh;
+
+        Trog::SQLite::TagIndex::add_post($post,$self);
     }
 }
 
@@ -100,6 +110,7 @@ sub add ($self,@posts) {
 sub delete($self, @posts) {
     foreach my $update (@posts) {
         unlink "$datastore/$update->{id}" or confess;
+        Trog::SQLite::TagIndex::remove_post($update);
     }
     return 0;
 }

+ 1 - 2
lib/Trog/DataModule.pm

@@ -3,6 +3,7 @@ package Trog::DataModule;
 use strict;
 use warnings;
 
+use UUID::Tiny;
 use List::Util;
 use File::Copy;
 use Mojo::File;
@@ -132,7 +133,6 @@ sub filter ($self, $query, @filtered) {
     @filtered = grep { $_->{title} =~ m/\Q$request{like}\E/i || $_->{data} =~ m/\Q$request{like}\E/i } @filtered if $request{like};
 
     @filtered = grep { $_->{user} eq $request{author} } @filtered if $request{author};
-
     return @filtered;
 }
 
@@ -223,7 +223,6 @@ You probably won't want to override this.
 =cut
 
 sub add ($self, @posts) {
-    require UUID::Tiny;
     my @to_write;
     foreach my $post (@posts) {
         $post->{id} //= UUID::Tiny::create_uuid_as_string(UUID::Tiny::UUID_V1, UUID::Tiny::UUID_NS_DNS);

+ 44 - 0
lib/Trog/SQLite.pm

@@ -3,6 +3,11 @@ package Trog::SQLite;
 use strict;
 use warnings;
 
+no warnings 'experimental';
+use feature qw{signatures};
+
+use POSIX qw{floor};
+
 use DBI;
 use DBD::SQLite;
 use File::Slurper qw{read_text};
@@ -21,4 +26,43 @@ sub dbh {
     return $db;
 }
 
+=head2 bulk_insert(DBI $dbh, STRING $table, ARRAYREF $keys, MIXED @values)
+
+Upsert the values into specified table with provided keys.
+values will be N-tuples based on the number and ordering of the keys.
+
+Essentially works around the 999 named param limit and executes by re-using prepared statements.
+This results in a quick insert/update of lots of data, such as when building an index or importing data.
+
+Dies on failure.
+
+Doesn't escape the table name or keys, so don't be a maroon and let users pass data to this
+
+=cut
+
+sub bulk_insert ($dbh, $table, $keys, $ACTION='IGNORE', @values) {
+    die "keys must be nonempty ARRAYREF" unless ref $keys eq 'ARRAY' && @$keys;
+    die "#Values must be a multiple of #keys" if @values % @$keys;
+
+    my ($smt,$query) = ('','');
+    while (@values) {
+        #Must have even multiple of #keys, so floor divide and chop remainder
+        my $nkeys = scalar(@$keys);
+        my $limit = floor( 999 / $nkeys );
+        $limit = $limit - ( $limit % $nkeys);
+        $smt = '' if scalar(@values) < $limit;
+        my @params = splice(@values,0,$limit);
+        if (!$smt) {
+            my @value_tuples;
+            my @huh = map { '?' } @params;
+            while (@huh) {
+                push(@value_tuples, "(".join(',',(splice(@huh,0,$nkeys))).")");
+            }
+            $query = "INSERT OR $ACTION INTO $table (".join(',',@$keys).") VALUES ".join(',',@value_tuples);
+            $smt = $dbh->prepare($query);
+        }
+        $smt->execute(@params);
+    }
+}
+
 1;

+ 65 - 0
lib/Trog/SQLite/TagIndex.pm

@@ -0,0 +1,65 @@
+package Trog::SQLite::TagIndex;
+
+use strict;
+use warnings;
+
+no warnings 'experimental';
+use feature qw{signatures};
+
+use List::Util qw{uniq};
+use Trog::SQLite;
+
+=head1 Trog::SQLite::TagIndex
+
+An SQLite3 index of posts by tag.
+Used to speed up the flat-file data model.
+
+=head1 FUNCTIONS
+
+=cut
+
+sub posts_for_tags ($limit=0, @tags) {
+    my $dbh = _dbh();
+    my $clause = @tags ? "WHERE tag IN (".join(',' ,(map {'?'} @tags)).")" : '';
+    if ($limit) {
+        $clause .= "LIMIT ?";
+        push(@tags,$limit);
+    }
+    my $rows = $dbh->selectall_arrayref("SELECT id FROM posts $clause",{ Slice => {} }, @tags);
+    return () unless ref $rows eq 'ARRAY' && @$rows;
+    return map { $_->{id} } @$rows;
+}
+
+sub add_post ($post,$data_obj) {
+    my $dbh = _dbh();
+    return build_index($data_obj,[$post]);
+}
+
+sub remove_post ($post) {
+    my $dbh = _dbh();
+    return $dbh->do("DELETE FROM posts_index WHERE post_id=?", undef, $post->{id});
+}
+
+sub build_index($data_obj,$posts=[]) {
+    my $dbh = _dbh();
+    $posts = $data_obj->read({ limit => 0, acls => ['admin'] }) unless @$posts;
+
+    my @tags = uniq map { @{$_->{tags}} } @$posts;
+    Trog::SQLite::bulk_insert($dbh,'tag', ['name'], 'IGNORE', @tags); 
+    my $t = $dbh->selectall_hashref("SELECT id,name FROM tag", 'name');
+    foreach my $k (keys(%$t)) { $t->{$k} = $t->{$k}->{id} };
+
+    Trog::SQLite::bulk_insert($dbh,'posts_index',[qw{post_id tag_id}], 'IGNORE', map {
+        my $subj = $_;
+        map { ( $subj->{id}, $t->{$_} ) } @{$subj->{tags}}
+    } @$posts );
+}
+
+# Ensure the db schema is OK, and give us a handle
+sub _dbh {
+    my $file   = 'schema/flatfile.schema';
+    my $dbname = "data/posts.db";
+    return Trog::SQLite::dbh($file,$dbname);
+}
+
+1;

+ 13 - 0
schema/flatfile.schema

@@ -0,0 +1,13 @@
+CREATE TABLE IF NOT EXISTS tag (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    name TEXT NOT NULL UNIQUE
+);
+
+CREATE TABLE IF NOT EXISTS posts_index (
+    post_id INTEGER NOT NULL,
+    tag_id INTEGER NOT NULL REFERENCES tag(id) ON DELETE CASCADE
+);
+
+CREATE INDEX IF NOT EXISTS tag_idx ON tag(name);
+
+CREATE VIEW IF NOT EXISTS posts AS SELECT p.post_id as id, t.name AS tag FROM posts_index AS p JOIN tag AS t ON t.id=p.tag_id;