Specification.pm 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. package Selenium::Specification;
  2. # ABSTRACT: Module for building a machine readable specification for Selenium
  3. use strict;
  4. use warnings;
  5. use v5.28;
  6. no warnings 'experimental';
  7. use feature qw/signatures/;
  8. use List::Util qw{uniq};
  9. use HTML::Parser();
  10. use JSON::MaybeXS();
  11. use File::HomeDir();
  12. use File::Slurper();
  13. use DateTime::Format::HTTP();
  14. use HTTP::Tiny();
  15. use File::Path qw{make_path};
  16. use File::Spec();
  17. #TODO make a JSONWire JSON spec since it's not changing
  18. # URLs and the container ID
  19. our %spec_urls = (
  20. unstable => {
  21. url => 'https://w3c.github.io/webdriver/',
  22. section_id => 'endpoints',
  23. },
  24. draft => {
  25. url => "https://www.w3.org/TR/webdriver2/",
  26. section_id => 'endpoints',
  27. },
  28. stable => {
  29. url => "https://www.w3.org/TR/webdriver1/",
  30. section_id => 'list-of-endpoints',
  31. },
  32. );
  33. our $browser = HTTP::Tiny->new();
  34. my %state;
  35. my $parse = [];
  36. our $method = {};
  37. =head1 SUBROUTINES
  38. =head2 read($type STRING, $nofetch BOOL)
  39. Reads the copy of the provided spec type, and fetches it if a cached version is not available.
  40. =cut
  41. sub read($client_dir, $type='stable', $nofetch=1) {
  42. my $dir = File::Spec->catdir( $client_dir,"specs" );
  43. my $file = File::Spec->catfile( "$dir","$type.json");
  44. fetch( once => $nofetch, dir => $dir );
  45. die "could not write $file: $@" unless -f $file;
  46. my $buf = File::Slurper::read_text($file);
  47. my $array = JSON::MaybeXS::decode_json($buf);
  48. my %hash;
  49. @hash{map { $_->{name} } @$array} = @$array;
  50. return \%hash;
  51. }
  52. =head2 fetch(%OPTIONS HASH)
  53. Builds a spec hash based upon the WC3 specification documents, and writes it to disk.
  54. =cut
  55. #TODO needs to grab args and argtypes still
  56. sub fetch (%options) {
  57. my $dir = $options{dir};
  58. my $rc = 0;
  59. foreach my $spec ( sort keys(%spec_urls) ) {
  60. make_path( $dir ) unless -d $dir;
  61. my $file = File::Spec->catfile( "$dir","$spec.json");
  62. my $last_modified = -f $file ? (stat($file))[9] : undef;
  63. if ($options{once} && $last_modified) {
  64. print STDERR "Skipping fetch, using cached result" if $options{verbose};
  65. next;
  66. }
  67. $last_modified = 0 if $options{force};
  68. my $spc = _build_spec($last_modified, %{$spec_urls{$spec}});
  69. if (!$spc) {
  70. print STDERR "Could not retrieve $spec_urls{$spec}{url}, skipping" if $options{verbose};
  71. $rc = 1;
  72. next;
  73. }
  74. # Second clause is for an edge case -- if the header is not set for some bizarre reason we should obey force still
  75. if (ref $spc ne 'ARRAY' && $last_modified) {
  76. print STDERR "Keeping cached result '$file', as page has not changed since last fetch.\n" if $options{verbose};
  77. next;
  78. }
  79. _write_spec($spc, $file);
  80. print "Wrote $file\n" if $options{verbose};
  81. }
  82. return $rc;
  83. }
  84. sub _write_spec ($spec, $file) {
  85. my $spec_json = JSON::MaybeXS::encode_json($spec);
  86. return File::Slurper::write_text($file, $spec_json);
  87. }
  88. sub _build_spec($last_modified, %spec) {
  89. my $page = $browser->get($spec{url});
  90. return unless $page->{success};
  91. if ($page->{headers}{'last-modified'} && $last_modified ) {
  92. my $modified = DateTime::Format::HTTP->parse_datetime($page->{headers}{'last-modified'})->epoch();
  93. return 'cache' if $modified < $last_modified;
  94. }
  95. my $html = $page->{content};
  96. $parse = [];
  97. %state = ( id => $spec{section_id} );
  98. my $parser = HTML::Parser->new(
  99. handlers => {
  100. start => [\&_handle_open, "tagname,attr"],
  101. end => [\&_handle_close, "tagname"],
  102. text => [\&_handle_text, "text"],
  103. }
  104. );
  105. $parser->parse($html);
  106. # Now that we have parsed the methods, let us go ahead and build the argspec based on the anchors for each endpoint.
  107. foreach my $m (@$parse) {
  108. $method = $m;
  109. %state = ();
  110. my $mparser = HTML::Parser->new(
  111. handlers => {
  112. start => [\&_endpoint_open, "tagname,attr"],
  113. end => [\&_endpoint_close, "tagname"],
  114. text => [\&_endpoint_text, "text"],
  115. },
  116. );
  117. $mparser->parse($html);
  118. }
  119. return _fixup(\%spec,$parse);
  120. }
  121. sub _fixup($spec,$parse) {
  122. @$parse = map {
  123. $_->{href} = "$spec->{url}$_->{href}";
  124. #XXX correct TYPO in the spec
  125. $_->{uri} =~ s/{sessionid\)/{sessionid}/g;
  126. @{$_->{output_params}} = grep { $_ ne 'null' } uniq @{$_->{output_params}};
  127. $_
  128. } @$parse;
  129. return $parse;
  130. }
  131. sub _handle_open($tag,$attr) {
  132. if ( $tag eq 'section' && ($attr->{id} || '') eq $state{id} ) {
  133. $state{active} = 1;
  134. return;
  135. }
  136. if ($tag eq 'tr') {
  137. $state{method} = 1;
  138. $state{headers} = [qw{method uri name}];
  139. $state{data} = {};
  140. return;
  141. }
  142. if ($tag eq 'td') {
  143. $state{heading} = shift @{$state{headers}};
  144. return;
  145. }
  146. if ($tag eq 'a' && $state{heading} && $attr->{href}) {
  147. $state{data}{href} = $attr->{href};
  148. }
  149. }
  150. sub _handle_close($tag) {
  151. if ($tag eq 'section') {
  152. $state{active} = 0;
  153. return;
  154. }
  155. if ($tag eq 'tr' && $state{active}) {
  156. if ($state{past_first}) {
  157. push(@$parse, $state{data});
  158. }
  159. $state{past_first} = 1;
  160. $state{method} = 0;
  161. return;
  162. }
  163. }
  164. sub _handle_text($text) {
  165. return unless $state{active} && $state{method} && $state{past_first} && $state{heading};
  166. $text =~ s/\s//gm;
  167. return unless $text;
  168. $state{data}{$state{heading}} .= $text;
  169. }
  170. # Endpoint parsers
  171. sub _endpoint_open($tag,$attr) {
  172. my $id = $method->{href};
  173. $id =~ s/^#//;
  174. if ($attr->{id} && $attr->{id} eq $id) {
  175. $state{active} = 1;
  176. }
  177. if ($tag eq 'ol') {
  178. $state{in_tag} = 1;
  179. }
  180. if ($tag eq 'dt' && $state{in_tag} && $state{last_tag} eq 'dl') {
  181. $state{in_dt} = 1;
  182. }
  183. if ($tag eq 'code' && $state{in_dt} && $state{in_tag} && $state{last_tag} eq 'dt') {
  184. $state{in_code} = 1;
  185. }
  186. $state{last_tag} = $tag;
  187. }
  188. sub _endpoint_close($tag) {
  189. return unless $state{active};
  190. if ($tag eq 'section') {
  191. $state{active} = 0;
  192. $state{in_tag} = 0;
  193. }
  194. if ($tag eq 'ol') {
  195. $state{in_tag} = 0;
  196. }
  197. if ($tag eq 'dt') {
  198. $state{in_dt} = 0;
  199. }
  200. if ($tag eq 'code') {
  201. $state{in_code} = 0;
  202. }
  203. }
  204. sub _endpoint_text($text) {
  205. if ($state{active} && $state{in_tag} && $state{in_code} && $state{in_dt} && $state{last_tag} eq 'code') {
  206. $method->{output_params} //= [];
  207. $text =~ s/\s//gm;
  208. push(@{$method->{output_params}},$text) if $text;
  209. }
  210. }
  211. 1;