Kaynağa Gözat

Fix #288: Dynamically generate robots.txt

George Baugh 2 yıl önce
ebeveyn
işleme
67676fbcd6
4 değiştirilmiş dosya ile 38 ekleme ve 30 silme
  1. 33 0
      lib/TCMS.pm
  2. 0 25
      lib/Trog/Routes/HTML.pm
  3. 1 0
      lib/Trog/Routes/JSON.pm
  4. 4 5
      www/templates/text/robots.tx

+ 33 - 0
lib/TCMS.pm

@@ -47,6 +47,13 @@ my %routes = %Trog::Routes::HTML::routes;
 @routes{ keys(%Trog::Routes::JSON::routes) } = values(%Trog::Routes::JSON::routes);
 @routes{ keys(%roots) }                      = values(%roots);
 
+# Add in global routes, here because they *must* know about all other routes
+# Also, nobody should ever override these.
+$routes{'/robots.txt'} = {
+    method   => 'GET',
+    callback => \&robots,
+};
+
 my %aliases = $data->aliases();
 
 # XXX this is built progressively across the forks, leading to inconsistent behavior.
@@ -277,6 +284,32 @@ sub app {
     }
 }
 
+=head2 robots
+
+Return an appropriate robots.txt
+
+This is a "special" route as it needs to know about all the routes in order to disallow noindex=1 routes.
+
+=cut
+
+sub robots ($query) {
+    state $etag = "robots-" . time();
+    # If there's a 'capture' route, we need to format it correctly.
+	state @banned = map { exists $routes{$_}{robot_name} ? $routes{$_}{robot_name} : $_ } grep { $routes{$_}{noindex} } sort keys(%routes);
+
+    return Trog::Renderer->render(
+        contenttype => 'text/plain',
+        template => 'robots.tx',
+        data => {
+            etag   => $etag,
+			banned => \@banned,
+            %$query,
+        },
+        code => 200,
+    );
+}
+
+
 sub _generic ( $type, $query ) {
     return _static( "$type.z", $query->{start}, $query->{streaming} ) if -f "www/statics/$type.z";
     return _static( $type,     $query->{start}, $query->{streaming} ) if -f "www/statics/$type";

+ 0 - 25
lib/Trog/Routes/HTML.pm

@@ -48,7 +48,6 @@ our $categorybar  = 'categories.tx';
 our %routes = (
     default => {
         callback => \&Trog::Routes::HTML::setup,
-        noindex  => 1,
     },
     '/index' => {
         method   => 'GET',
@@ -192,10 +191,6 @@ our %routes = (
         data     => { xml => 1, compressed => 1 },
         captures => ['map'],
     },
-    '/robots.txt' => {
-        method   => 'GET',
-        callback => \&Trog::Routes::HTML::robots,
-    },
     '/humans.txt' => {
         method   => 'GET',
         callback => \&Trog::Routes::HTML::posts,
@@ -428,26 +423,6 @@ sub see_also ($to) {
 
 These are expected to either return a 200, or redirect to something which does.
 
-=head2 robots
-
-Return an appropriate robots.txt
-
-=cut
-
-#TODO make this dynamic based on routes with the noindex=1 flag (they'll never see anything behind /auth)
-sub robots ($query) {
-    state $etag = "robots-" . time();
-    return Trog::Renderer->render(
-        contenttype => 'text/plain',
-        template => 'robots.tx',
-        data => {
-            etag   => $etag,
-            %$query,
-        },
-        code => 200,
-    );
-}
-
 =head2 setup
 
 One time setup page; should only display to the first user to visit the site which we presume to be the administrator.

+ 1 - 0
lib/Trog/Routes/JSON.pm

@@ -40,6 +40,7 @@ our %routes = (
         callback   => \&process_auth_change_request,
         captures   => ['token'],
         noindex    => 1,
+		robot_name => '/api/auth_change_request/*',
     },
 );
 

+ 4 - 5
www/templates/text/robots.tx

@@ -1,10 +1,9 @@
+# robots.txt for <: $domain :>
 User-agent: *
 Sitemap: http://<: $domain :>/sitemap_index.xml.gz
-Disallow: /login
-Disallow: /auth
-Disallow: /request_password_reset
-Disallow: /api
-Disallow: /json
+<: for $banned -> $route { -:>
+Disallow: <: $route :>
+<: } -:>
 Disallow: /themes
 Disallow: /img
 Disallow: /templates