Quellcode durchsuchen

Possible fix to #8

George S. Baugh vor 4 Jahren
Ursprung
Commit
058284e476
6 geänderte Dateien mit 38 neuen und 9 gelöschten Zeilen
  1. 7 0
      Changes
  2. 3 1
      at/sanity.test
  3. 1 0
      at/test.html
  4. 1 1
      dist.ini
  5. 18 1
      lib/Selenium/Client.pm
  6. 8 6
      lib/Selenium/Specification.pm

+ 7 - 0
Changes

@@ -1,5 +1,12 @@
 Revision history for Selenium-Client
 
+1.04 2021-04-?? TEODESIAN
+    [BUG FIXES]
+    - Handle UTF-8 content correctly
+    [NEW FEATURES]
+    - Automatically normalize data returned by the selenium server, and add normalize parameter to constructor
+    - Add advice on proper UTF-8 handling in callers
+
 1.03 2021-04-12 TEODESIAN
     [BUG FIXES]
     - Don't clobber $? in destructor

+ 3 - 1
at/sanity.test

@@ -2,12 +2,13 @@
 
 use strict;
 use warnings;
+use utf8;
 
 use Test2::V0;
 
 #XXX Test2 Knows Better TM
 no warnings 'experimental';
-use feature qw/signatures/;
+use feature qw/signatures unicode_strings/;
 
 use Test::Fatal;
 use FindBin;
@@ -131,6 +132,7 @@ foreach my $browser (@browsers) {
             is( exception { $session->SwitchToWindow( handle => $handle ) }, undef, "Can switch to old window");
 
             like($session->GetPageSource(), qr/Howdy/i, "Switched window correctly");
+            like($session->GetPageSource(), qr/🥰/i, "Unicode handled properly");
             $session->SwitchToWindow( handle => $newhandle );
             is( exception { $session->CloseWindow() }, undef, "CloseWindow closes current window context");
             $session->SwitchToWindow( handle => $handle );

+ 1 - 0
at/test.html

@@ -20,6 +20,7 @@
         <h1>
             Howdy Howdy Howdy
         </h1>
+        <p>Unicode Works 🥰</p>
         <form id="howIsBabbyFormed" action="other.html">
             <label for="text" class="red">Text</label>
             <input name="text" title="default" type="text" value="default"></input>

+ 1 - 1
dist.ini

@@ -1,5 +1,5 @@
 name = Selenium-Client
-version = 1.03
+version = 1.04
 author = George S. Baugh <george@troglodyne.net>
 license = MIT
 copyright_holder = George S. Baugh

+ 18 - 1
lib/Selenium/Client.pm

@@ -21,6 +21,7 @@ use File::Spec();
 use Sub::Install();
 use Net::EmptyPort();
 use Capture::Tiny qw{capture_merged};
+use Unicode::Normalize qw{NFC};
 
 use Selenium::Specification;
 
@@ -79,6 +80,12 @@ Only turn this off when you are debugging.
 
 Default: true
 
+=item C<normalize> BOOLEAN - Automatically normalize UTF-8 output using Normal Form C (NFC).
+
+If another normal form is preferred, you should turn this off and directly use L<Unicode::Normalize>.
+
+Default: true
+
 =item C<post_callbacks> ARRAY[CODE] - Executed after each request to the selenium server.
 
 Callbacks are passed $self, an HTTP::Tiny response hashref and the request hashref.
@@ -144,6 +151,7 @@ sub new($class,%options) {
     $options{auto_close} //= 1;
     $options{browser}    //= '';
     $options{headless}   //= 1;
+    $options{normalize}  //= 1;
 
     #create client_dir and log-dir
     my $dir = File::Spec->catdir( $options{client_dir},"perl-client" );
@@ -457,7 +465,10 @@ sub _request($self, $method, %params) {
 
     print "$res->{status} : $res->{content}\n" if $self->{debug} && ref $res eq 'HASH';
 
-    my $decoded_content = eval { JSON::MaybeXS::decode_json($res->{content}) };
+    # all the selenium servers are UTF-8
+    my $normal = $res->{content};
+    $normal = NFC( $normal ) if $self->{normalize};
+    my $decoded_content = eval { JSON::MaybeXS->new()->utf8()->decode( $normal ) };
     confess "$res->{reason} :\n Consult $subject->{href}\nRaw Error:\n$res->{content}\n" unless $res->{success};
 
     if (grep { $method eq $_ } @no_process) {
@@ -648,6 +659,12 @@ Don't close this or your test will fail for obvious reasons.
 This also means that if you have to send ^C (SIGTERM) to your script or exit() prematurely, said window may be left dangling,
 as these behave a lot more like POSIX::_exit() does on unix systems.
 
+=head1 UTF-8 considerations
+
+The JSON responses from the selenium server are decoded as UTF-8, as per the Selenium standard.
+As a convenience, we automatically apply NFC to output via L<Unicode::Normalize>, which can be disabled by passing normalize=0 to the constructor.
+If you are comparing output from selenium calls against UTF-8 glyphs, `use utf8`, `use feature qw{unicode_strings}` and normalization is strongly suggested.
+
 =head1 AUTHOR
 
 George S. Baugh <george@troglodyne.net>

+ 8 - 6
lib/Selenium/Specification.pm

@@ -8,7 +8,7 @@ use warnings;
 use v5.28;
 
 no warnings 'experimental';
-use feature qw/signatures/;
+use feature qw/signatures unicode_strings/;
 
 use List::Util qw{uniq};
 use HTML::Parser();
@@ -19,6 +19,8 @@ use DateTime::Format::HTTP();
 use HTTP::Tiny();
 use File::Path qw{make_path};
 use File::Spec();
+use Encode qw{decode};
+use Unicode::Normalize qw{NFC};
 
 #TODO make a JSONWire JSON spec since it's not changing
 
@@ -56,8 +58,8 @@ sub read($client_dir, $type='stable', $nofetch=1) {
     my $file =  File::Spec->catfile( "$dir","$type.json");
     fetch( once => $nofetch, dir => $dir );
     die "could not write $file: $@" unless -f $file;
-    my $buf = File::Slurper::read_text($file);
-    my $array = JSON::MaybeXS::decode_json($buf);
+    my $buf = File::Slurper::read_binary($file);
+    my $array = JSON::MaybeXS->new()->utf8()->decode($buf);
     my %hash;
     @hash{map { $_->{name} } @$array} = @$array;
     return \%hash;
@@ -108,8 +110,8 @@ sub fetch (%options) {
 
 
 sub _write_spec ($spec, $file) {
-    my $spec_json = JSON::MaybeXS::encode_json($spec);
-    return File::Slurper::write_text($file, $spec_json);
+    my $spec_json = JSON::MaybeXS->new()->utf8()->encode($spec);
+    return File::Slurper::write_binary($file, $spec_json);
 }
 
 sub _build_spec($last_modified, %spec) {
@@ -121,7 +123,7 @@ sub _build_spec($last_modified, %spec) {
         return 'cache' if $modified < $last_modified;
     }
 
-    my $html = $page->{content};
+    my $html = NFC( decode('UTF-8', $page->{content}) );
 
     $parse = [];
     %state = ( id => $spec{section_id} );