TCMS.pm 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448
  1. package TCMS;
  2. use strict;
  3. use warnings;
  4. no warnings 'experimental';
  5. use feature qw{signatures state};
  6. use Date::Format qw{strftime};
  7. use HTTP::Body ();
  8. use URL::Encode ();
  9. use Text::Xslate ();
  10. use Plack::MIME ();
  11. use Mojo::File ();
  12. use DateTime::Format::HTTP();
  13. use CGI::Cookie ();
  14. use File::Basename();
  15. use IO::Compress::Gzip();
  16. use Time::HiRes qw{gettimeofday tv_interval};
  17. use HTTP::Parser::XS qw{HEADERS_AS_HASHREF};
  18. use List::Util;
  19. use UUID::Tiny();
  20. #Grab our custom routes
  21. use lib 'lib';
  22. use Trog::Routes::HTML;
  23. use Trog::Routes::JSON;
  24. use Trog::Log qw{:all};
  25. use Trog::Auth;
  26. use Trog::Utils;
  27. use Trog::Config;
  28. use Trog::Data;
  29. use Trog::Vars;
  30. # Troglodyne philosophy - simple as possible
  31. # Import the routes
  32. my $conf = Trog::Config::get();
  33. my $data = Trog::Data->new($conf);
  34. my %roots = $data->routes();
  35. my %routes = %Trog::Routes::HTML::routes;
  36. @routes{ keys(%Trog::Routes::JSON::routes) } = values(%Trog::Routes::JSON::routes);
  37. @routes{ keys(%roots) } = values(%roots);
  38. my %aliases = $data->aliases();
  39. # XXX this is built progressively across the forks, leading to inconsistent behavior.
  40. # This should eventually be pre-filled from DB.
  41. my %etags;
  42. #1MB chunks
  43. my $CHUNK_SIZE = 1024000;
  44. my $CHUNK_SEP = 'tCMSep666YOLO42069';
  45. #Stuff that isn't in upstream finders
  46. my %extra_types = (
  47. '.docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
  48. );
  49. =head2 app()
  50. Dispatches requests based on %routes built above.
  51. The dispatcher here does *not* do anything with the authn/authz data. It sets those in the 'user' and 'acls' parameters of the query object passed to routes.
  52. If a path passed is not a defined route (or regex route), but exists as a file under www/, it will be served up immediately.
  53. =cut
  54. sub app {
  55. # Start the server timing clock
  56. my $start = [gettimeofday];
  57. my $env = shift;
  58. return _toolong() if length( $env->{REQUEST_URI} ) > 2048;
  59. # Check eTags. If we don't know about it, just assume it's good and lazily fill the cache
  60. # XXX yes, this allows cache poisoning...but only for logged in users!
  61. if ( $env->{HTTP_IF_NONE_MATCH} ) {
  62. return [ 304, [], [''] ] if $env->{HTTP_IF_NONE_MATCH} eq ( $etags{ $env->{REQUEST_URI} } || '' );
  63. $etags{ $env->{REQUEST_URI} } = $env->{HTTP_IF_NONE_MATCH} unless exists $etags{ $env->{REQUEST_URI} };
  64. }
  65. my $last_fetch = 0;
  66. if ( $env->{HTTP_IF_MODIFIED_SINCE} ) {
  67. $last_fetch = DateTime::Format::HTTP->parse_datetime( $env->{HTTP_IF_MODIFIED_SINCE} )->epoch();
  68. }
  69. #XXX Don't use statics anything that has a search query
  70. # On one hand, I don't want to DOS the disk, but I'd also like some like ?rss...
  71. # Should probably turn those into aliases.
  72. my $has_query = !!$env->{QUERY_STRING};
  73. my $query = {};
  74. $query = URL::Encode::url_params_mixed( $env->{QUERY_STRING} ) if $env->{QUERY_STRING};
  75. #Actually parse the POSTDATA and dump it into the QUERY object if this is a POST
  76. if ( $env->{REQUEST_METHOD} eq 'POST' ) {
  77. my $body = HTTP::Body->new( $env->{CONTENT_TYPE}, $env->{CONTENT_LENGTH} );
  78. while ( $env->{'psgi.input'}->read( my $buf, $CHUNK_SIZE ) ) {
  79. $body->add($buf);
  80. }
  81. @$query{ keys( %{ $body->param } ) } = values( %{ $body->param } );
  82. @$query{ keys( %{ $body->upload } ) } = values( %{ $body->upload } );
  83. }
  84. # Grab the list of ACLs we want to add to a post, if any.
  85. $query->{acls} = [ $query->{acls} ] if ( $query->{acls} && ref $query->{acls} ne 'ARRAY' );
  86. my $path = $env->{PATH_INFO};
  87. $path = '/index' if $path eq '/';
  88. # Translate alias paths into their actual path
  89. $path = $aliases{$path} if exists $aliases{$path};
  90. # Figure out if we want compression or not
  91. my $alist = $env->{HTTP_ACCEPT_ENCODING} || '';
  92. $alist =~ s/\s//g;
  93. my @accept_encodings;
  94. @accept_encodings = split( /,/, $alist );
  95. my $deflate = grep { 'gzip' eq $_ } @accept_encodings;
  96. # Collapse multiple slashes in the path
  97. $path =~ s/[\/]+/\//g;
  98. # Let's open up our default route before we bother to see if users even exist
  99. return $routes{default}{callback}->($query) unless -f "config/setup";
  100. my $cookies = {};
  101. if ( $env->{HTTP_COOKIE} ) {
  102. $cookies = CGI::Cookie->parse( $env->{HTTP_COOKIE} );
  103. }
  104. # Set the IP of the request so we can fail2ban
  105. $Trog::Log::ip = $env->{HTTP_X_FORWARDED_FOR} || $env->{REMOTE_ADDR};
  106. my $active_user = '';
  107. $Trog::Log::user = 'nobody';
  108. if ( exists $cookies->{tcmslogin} ) {
  109. $active_user = Trog::Auth::session2user( $cookies->{tcmslogin}->value );
  110. $Trog::Log::user = $active_user if $active_user;
  111. }
  112. $query->{user_acls} = [];
  113. $query->{user_acls} = Trog::Auth::acls4user($active_user) // [] if $active_user;
  114. # Log the request.
  115. Trog::Log::uuid(UUID::Tiny::create_uuid_as_string( UUID::Tiny::UUID_V1, UUID::Tiny::UUID_NS_DNS ));
  116. INFO("$env->{REQUEST_METHOD} $path");
  117. # Filter out passed ACLs which are naughty
  118. my $is_admin = grep { $_ eq 'admin' } @{ $query->{user_acls} };
  119. @{ $query->{acls} } = grep { $_ ne 'admin' } @{ $query->{acls} } unless $is_admin;
  120. # Disallow any paths that are naughty ( starman auto-removes .. up-traversal)
  121. if ( index( $path, '/templates' ) == 0 || index( $path, '/statics' ) == 0 || $path =~ m/.*(\.psgi|\.pm)$/i ) {
  122. return _forbidden($query);
  123. }
  124. my $streaming = $env->{'psgi.streaming'};
  125. $query->{streaming} = $streaming;
  126. # If we have a static render, just use it instead (These will ALWAYS be correct, data saves invalidate this)
  127. # TODO: make this key on admin INSTEAD of active user when we add non-admin users.
  128. $query->{start} = $start;
  129. if ( !$active_user && !$has_query ) {
  130. return _static( "$path.z", $start, $streaming ) if -f "www/statics/$path.z" && $deflate;
  131. return _static( $path, $start, $streaming ) if -f "www/statics/$path";
  132. }
  133. # Handle HTTP range/streaming requests
  134. my $range = $env->{HTTP_RANGE} || "bytes=0-" if $env->{HTTP_RANGE} || $env->{HTTP_IF_RANGE};
  135. my @ranges;
  136. if ($range) {
  137. $range =~ s/bytes=//g;
  138. push(
  139. @ranges,
  140. map {
  141. [ split( /-/, $_ ) ];
  142. #$tuples[1] //= $tuples[0] + $CHUNK_SIZE;
  143. #\@tuples
  144. } split( /,/, $range )
  145. );
  146. }
  147. return _serve( "www/$path", $start, $streaming, \@ranges, $last_fetch, $deflate ) if -f "www/$path";
  148. return _serve( "totp/$path", $start, $streaming, \@ranges, $last_fetch, $deflate ) if -f "totp/$path" && $active_user;
  149. #Handle regex/capture routes
  150. if ( !exists $routes{$path} ) {
  151. my @captures;
  152. foreach my $pattern ( keys(%routes) ) {
  153. @captures = $path =~ m/^$pattern$/;
  154. if (@captures) {
  155. $path = $pattern;
  156. foreach my $field ( @{ $routes{$path}{captures} } ) {
  157. $routes{$path}{data} //= {};
  158. $routes{$path}{data}{$field} = shift @captures;
  159. }
  160. last;
  161. }
  162. }
  163. }
  164. $query->{deflate} = $deflate;
  165. $query->{user} = $active_user;
  166. return _forbidden($query) if $routes{$path}{auth} && !$active_user;
  167. return _notfound($query) unless exists $routes{$path};
  168. return _badrequest($query) unless grep { $env->{REQUEST_METHOD} eq $_ } ( $routes{$path}{method} || '', 'HEAD' );
  169. @{$query}{ keys( %{ $routes{$path}{'data'} } ) } = values( %{ $routes{$path}{'data'} } ) if ref $routes{$path}{'data'} eq 'HASH' && %{ $routes{$path}{'data'} };
  170. #Set various things we don't want overridden
  171. $query->{body} = '';
  172. $query->{dnt} = $env->{HTTP_DNT};
  173. $query->{user} = $active_user;
  174. $query->{domain} = $env->{HTTP_X_FORWARDED_HOST} || $env->{HTTP_HOST};
  175. $query->{route} = $path;
  176. $query->{scheme} = $env->{'psgi.url_scheme'} // 'http';
  177. $query->{social_meta} = 1;
  178. $query->{primary_post} = {};
  179. $query->{has_query} = $has_query;
  180. #XXX there is a trick to now use strict refs, but I don't remember it right at the moment
  181. {
  182. no strict 'refs';
  183. my $output = $routes{$path}{callback}->($query);
  184. # Append server-timing headers
  185. my $tot = tv_interval($start) * 1000;
  186. push( @{ $output->[1] }, 'Server-Timing' => "app;dur=$tot" );
  187. return $output;
  188. }
  189. }
  190. sub _generic ( $type, $query ) {
  191. return _static( "$type.z", $query->{start}, $query->{streaming} ) if -f "www/statics/$type.z";
  192. return _static( $type, $query->{start}, $query->{streaming} ) if -f "www/statics/$type";
  193. my %lookup = (
  194. notfound => \&Trog::Routes::HTML::notfound,
  195. forbidden => \&Trog::Routes::HTML::forbidden,
  196. badrequest => \&Trog::Routes::HTML::badrequest,
  197. toolong => \&Trog::Routes::HTML::toolong,
  198. );
  199. return $lookup{$type}->($query);
  200. }
  201. sub _notfound ($query) {
  202. return _generic( 'notfound', $query );
  203. }
  204. sub _forbidden ($query) {
  205. return _generic( 'forbidden', $query );
  206. }
  207. sub _badrequest ($query) {
  208. return _generic( 'badrequest', $query );
  209. }
  210. sub _toolong() {
  211. return _generic( 'toolong', {} );
  212. }
  213. sub _static ( $path, $start, $streaming, $last_fetch = 0 ) {
  214. # XXX because of psgi I can't just vomit the file directly
  215. if ( open( my $fh, '<', "www/statics/$path" ) ) {
  216. my $headers = '';
  217. # NOTE: this is relying on while advancing the file pointer
  218. while (<$fh>) {
  219. last if $_ eq "\n";
  220. $headers .= $_;
  221. }
  222. my ( undef, undef, $status, undef, $headers_parsed ) = HTTP::Parser::XS::parse_http_response( "$headers\n", HEADERS_AS_HASHREF );
  223. #XXX need to put this into the file itself
  224. my $mt = ( stat($fh) )[9];
  225. my @gm = gmtime($mt);
  226. my $now_string = strftime( "%a, %d %b %Y %H:%M:%S GMT", @gm );
  227. my $code = $mt > $last_fetch ? $status : 304;
  228. $headers_parsed->{"Last-Modified"} = $now_string;
  229. # Append server-timing headers
  230. my $tot = tv_interval($start) * 1000;
  231. $headers_parsed->{'Server-Timing'} = "static;dur=$tot";
  232. #XXX uwsgi just opens the file *again* when we already have a filehandle if it has a path.
  233. # starman by comparison doesn't violate the principle of least astonishment here.
  234. # This is probably a performance optimization, but makes the kind of micromanagement I need to do inconvenient.
  235. # As such, we will just return a stream.
  236. return sub {
  237. my $responder = shift;
  238. #push(@headers, 'Content-Length' => $sz);
  239. my $writer = $responder->( [ $code, [%$headers_parsed] ] );
  240. while ( $fh->read( my $buf, $CHUNK_SIZE ) ) {
  241. $writer->write($buf);
  242. }
  243. close $fh;
  244. $writer->close;
  245. }
  246. if $streaming;
  247. return [ $code, [%$headers_parsed], $fh ];
  248. }
  249. return [ 403, [ 'Content-Type' => $Trog::Vars::content_types{plain} ], ["STAY OUT YOU RED MENACE"] ];
  250. }
  251. sub _range ( $fh, $ranges, $sz, %headers ) {
  252. # Set mode
  253. my $primary_ct = "Content-Type: $headers{'Content-type'}";
  254. my $is_multipart = scalar(@$ranges) > 1;
  255. if ($is_multipart) {
  256. $headers{'Content-type'} = "multipart/byteranges; boundary=$CHUNK_SEP";
  257. }
  258. my $code = 206;
  259. my $fc = '';
  260. # Calculate the content-length up-front. We have to fix unspecified lengths first, and reject bad requests.
  261. foreach my $range (@$ranges) {
  262. $range->[1] //= $sz - 1;
  263. return [ 416, [%headers], ["Requested range not satisfiable"] ] if $range->[0] > $sz || $range->[0] < 0 || $range->[1] < 0 || $range->[0] > $range->[1];
  264. }
  265. $headers{'Content-Length'} = List::Util::sum( map { my $arr = $_; $arr->[1] + 1, -$arr->[0] } @$ranges );
  266. #XXX Add the entity header lengths to the value - should hash-ify this to DRY
  267. if ($is_multipart) {
  268. foreach my $range (@$ranges) {
  269. $headers{'Content-Length'} += length("$fc--$CHUNK_SEP\n$primary_ct\nContent-Range: bytes $range->[0]-$range->[1]/$sz\n\n");
  270. $fc = "\n";
  271. }
  272. $headers{'Content-Length'} += length("\n--$CHUNK_SEP\--\n");
  273. $fc = '';
  274. }
  275. return sub {
  276. my $responder = shift;
  277. my $writer;
  278. foreach my $range (@$ranges) {
  279. $headers{'Content-Range'} = "bytes $range->[0]-$range->[1]/$sz" unless $is_multipart;
  280. $writer //= $responder->( [ $code, [%headers] ] );
  281. $writer->write("$fc--$CHUNK_SEP\n$primary_ct\nContent-Range: bytes $range->[0]-$range->[1]/$sz\n\n") if $is_multipart;
  282. $fc = "\n";
  283. my $len = List::Util::min( $sz, $range->[1] + 1 ) - $range->[0];
  284. $fh->seek( $range->[0], 0 );
  285. while ($len) {
  286. $fh->read( my $buf, List::Util::min( $len, $CHUNK_SIZE ) );
  287. $writer->write($buf);
  288. # Adjust for amount written
  289. $len = List::Util::max( $len - $CHUNK_SIZE, 0 );
  290. }
  291. }
  292. $fh->close();
  293. $writer->write("\n--$CHUNK_SEP\--\n") if $is_multipart;
  294. $writer->close;
  295. };
  296. }
  297. sub _serve ( $path, $start, $streaming, $ranges, $last_fetch = 0, $deflate = 0 ) {
  298. my $mf = Mojo::File->new($path);
  299. my $ext = '.' . $mf->extname();
  300. my $ft;
  301. if ($ext) {
  302. $ft = Plack::MIME->mime_type($ext) if $ext;
  303. $ft ||= $extra_types{$ext} if exists $extra_types{$ext};
  304. }
  305. $ft ||= $Trog::Vars::content_types{plain};
  306. my $ct = 'Content-type';
  307. my @headers = ( $ct => $ft );
  308. #TODO use static Cache-Control for everything but JS/CSS?
  309. push( @headers, 'Cache-control' => $Trog::Vars::cache_control{revalidate} );
  310. push( @headers, 'Accept-Ranges' => 'bytes' );
  311. my $mt = ( stat($path) )[9];
  312. my $sz = ( stat(_) )[7];
  313. my @gm = gmtime($mt);
  314. my $now_string = strftime( "%a, %d %b %Y %H:%M:%S GMT", @gm );
  315. my $code = $mt > $last_fetch ? 200 : 304;
  316. push( @headers, "Last-Modified" => $now_string );
  317. push( @headers, 'Vary' => 'Accept-Encoding' );
  318. if ( open( my $fh, '<', $path ) ) {
  319. return _range( $fh, $ranges, $sz, @headers ) if @$ranges && $streaming;
  320. # Transfer-encoding: chunked
  321. return sub {
  322. my $responder = shift;
  323. push( @headers, 'Content-Length' => $sz );
  324. my $writer = $responder->( [ $code, \@headers ] );
  325. while ( $fh->read( my $buf, $CHUNK_SIZE ) ) {
  326. $writer->write($buf);
  327. }
  328. close $fh;
  329. $writer->close;
  330. }
  331. if $streaming && $sz > $CHUNK_SIZE;
  332. #Return data in the event the caller does not support deflate
  333. if ( !$deflate ) {
  334. push( @headers, "Content-Length" => $sz );
  335. # Append server-timing headers
  336. my $tot = tv_interval($start) * 1000;
  337. push( @headers, 'Server-Timing' => "file;dur=$tot" );
  338. return [ $code, \@headers, $fh ];
  339. }
  340. #Compress everything less than 1MB
  341. push( @headers, "Content-Encoding" => "gzip" );
  342. my $dfh;
  343. IO::Compress::Gzip::gzip( $fh => \$dfh );
  344. print $IO::Compress::Gzip::GzipError if $IO::Compress::Gzip::GzipError;
  345. push( @headers, "Content-Length" => length($dfh) );
  346. # Append server-timing headers
  347. my $tot = tv_interval($start) * 1000;
  348. push( @headers, 'Server-Timing' => "file;dur=$tot" );
  349. return [ $code, \@headers, [$dfh] ];
  350. }
  351. return [ 403, [ $ct => $Trog::Vars::content_types{plain} ], ["STAY OUT YOU RED MENACE"] ];
  352. }
  353. 1;