Specification.pm 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. package Selenium::Specification;
  2. # ABSTRACT: Module for building a machine readable specification for Selenium
  3. use strict;
  4. use warnings;
  5. no warnings 'experimental';
  6. use feature qw/signatures/;
  7. use List::Util qw{uniq};
  8. use HTML::Parser();
  9. use JSON::MaybeXS();
  10. use File::HomeDir();
  11. use File::Slurper();
  12. use DateTime::Format::HTTP();
  13. use HTTP::Tiny();
  14. use File::Path qw{make_path};
  15. use File::Spec();
  16. #TODO make a JSONWire JSON spec since it's not changing
  17. # URLs and the container ID
  18. our %spec_urls = (
  19. unstable => {
  20. url => 'https://w3c.github.io/webdriver/',
  21. section_id => 'endpoints',
  22. },
  23. draft => {
  24. url => "https://www.w3.org/TR/webdriver2/",
  25. section_id => 'endpoints',
  26. },
  27. stable => {
  28. url => "https://www.w3.org/TR/webdriver1/",
  29. section_id => 'list-of-endpoints',
  30. },
  31. );
  32. our $browser = HTTP::Tiny->new();
  33. my %state;
  34. my $parse = [];
  35. my $dir = File::Spec->catdir( File::HomeDir::my_home(),".selenium","specs" );
  36. our $method = {};
  37. =head1 SUBROUTINES
  38. =head2 read($type STRING, $nofetch BOOL)
  39. Reads the copy of the provided spec type, and fetches it if a cached version is not available.
  40. =cut
  41. sub read($type='stable', $nofetch=1) {
  42. my $file = File::Spec->catfile( "$dir","$type.json");
  43. fetch( once => $nofetch );
  44. die "could not write $file: $@" unless -f $file;
  45. my $buf = File::Slurper::read_text($file);
  46. my $array = JSON::MaybeXS::decode_json($buf);
  47. my %hash;
  48. @hash{map { $_->{name} } @$array} = @$array;
  49. return \%hash;
  50. }
  51. =head2 fetch(%OPTIONS HASH)
  52. Builds a spec hash based upon the WC3 specification documents, and writes it to disk.
  53. =cut
  54. #TODO needs to grab args and argtypes still
  55. sub fetch (%options) {
  56. $dir = $options{dir} if $options{dir};
  57. my $rc = 0;
  58. foreach my $spec ( sort keys(%spec_urls) ) {
  59. make_path( $dir ) unless -d $dir;
  60. my $file = File::Spec->catfile( "$dir","$spec.json");
  61. my $last_modified = -f $file ? (stat($file))[9] : undef;
  62. if ($options{once} && $last_modified) {
  63. print STDERR "Skipping fetch, using cached result" if $options{verbose};
  64. next;
  65. }
  66. $last_modified = 0 if $options{force};
  67. my $spc = _build_spec($last_modified, %{$spec_urls{$spec}});
  68. if (!$spc) {
  69. print STDERR "Could not retrieve $spec_urls{$spec}{url}, skipping" if $options{verbose};
  70. $rc = 1;
  71. next;
  72. }
  73. # Second clause is for an edge case -- if the header is not set for some bizarre reason we should obey force still
  74. if (ref $spc ne 'ARRAY' && $last_modified) {
  75. print STDERR "Keeping cached result '$file', as page has not changed since last fetch.\n" if $options{verbose};
  76. next;
  77. }
  78. _write_spec($spc, $file);
  79. print "Wrote $file\n" if $options{verbose};
  80. }
  81. return $rc;
  82. }
  83. sub _write_spec ($spec, $file) {
  84. my $spec_json = JSON::MaybeXS::encode_json($spec);
  85. return File::Slurper::write_text($file, $spec_json);
  86. }
  87. sub _build_spec($last_modified, %spec) {
  88. my $page = $browser->get($spec{url});
  89. return unless $page->{success};
  90. if ($page->{headers}{'last-modified'} && $last_modified ) {
  91. my $modified = DateTime::Format::HTTP->parse_datetime($page->{headers}{'last-modified'})->epoch();
  92. return 'cache' if $modified < $last_modified;
  93. }
  94. my $html = $page->{content};
  95. $parse = [];
  96. %state = ( id => $spec{section_id} );
  97. my $parser = HTML::Parser->new(
  98. handlers => {
  99. start => [\&_handle_open, "tagname,attr"],
  100. end => [\&_handle_close, "tagname"],
  101. text => [\&_handle_text, "text"],
  102. }
  103. );
  104. $parser->parse($html);
  105. # Now that we have parsed the methods, let us go ahead and build the argspec based on the anchors for each endpoint.
  106. foreach my $m (@$parse) {
  107. $method = $m;
  108. %state = ();
  109. my $mparser = HTML::Parser->new(
  110. handlers => {
  111. start => [\&_endpoint_open, "tagname,attr"],
  112. end => [\&_endpoint_close, "tagname"],
  113. text => [\&_endpoint_text, "text"],
  114. },
  115. );
  116. $mparser->parse($html);
  117. }
  118. return _fixup(\%spec,$parse);
  119. }
  120. sub _fixup($spec,$parse) {
  121. @$parse = map {
  122. $_->{href} = "$spec->{url}$_->{href}";
  123. #XXX correct TYPO in the spec
  124. $_->{uri} =~ s/{sessionid\)/{sessionid}/g;
  125. @{$_->{output_params}} = grep { $_ ne 'null' } uniq @{$_->{output_params}};
  126. $_
  127. } @$parse;
  128. return $parse;
  129. }
  130. sub _handle_open($tag,$attr) {
  131. if ( $tag eq 'section' && ($attr->{id} || '') eq $state{id} ) {
  132. $state{active} = 1;
  133. return;
  134. }
  135. if ($tag eq 'tr') {
  136. $state{method} = 1;
  137. $state{headers} = [qw{method uri name}];
  138. $state{data} = {};
  139. return;
  140. }
  141. if ($tag eq 'td') {
  142. $state{heading} = shift @{$state{headers}};
  143. return;
  144. }
  145. if ($tag eq 'a' && $state{heading} && $attr->{href}) {
  146. $state{data}{href} = $attr->{href};
  147. }
  148. }
  149. sub _handle_close($tag) {
  150. if ($tag eq 'section') {
  151. $state{active} = 0;
  152. return;
  153. }
  154. if ($tag eq 'tr' && $state{active}) {
  155. if ($state{past_first}) {
  156. push(@$parse, $state{data});
  157. }
  158. $state{past_first} = 1;
  159. $state{method} = 0;
  160. return;
  161. }
  162. }
  163. sub _handle_text($text) {
  164. return unless $state{active} && $state{method} && $state{past_first} && $state{heading};
  165. $text =~ s/\s//gm;
  166. return unless $text;
  167. $state{data}{$state{heading}} .= $text;
  168. }
  169. # Endpoint parsers
  170. sub _endpoint_open($tag,$attr) {
  171. my $id = $method->{href};
  172. $id =~ s/^#//;
  173. if ($attr->{id} && $attr->{id} eq $id) {
  174. $state{active} = 1;
  175. }
  176. if ($tag eq 'ol') {
  177. $state{in_tag} = 1;
  178. }
  179. if ($tag eq 'dt' && $state{in_tag} && $state{last_tag} eq 'dl') {
  180. $state{in_dt} = 1;
  181. }
  182. if ($tag eq 'code' && $state{in_dt} && $state{in_tag} && $state{last_tag} eq 'dt') {
  183. $state{in_code} = 1;
  184. }
  185. $state{last_tag} = $tag;
  186. }
  187. sub _endpoint_close($tag) {
  188. return unless $state{active};
  189. if ($tag eq 'section') {
  190. $state{active} = 0;
  191. $state{in_tag} = 0;
  192. }
  193. if ($tag eq 'ol') {
  194. $state{in_tag} = 0;
  195. }
  196. if ($tag eq 'dt') {
  197. $state{in_dt} = 0;
  198. }
  199. if ($tag eq 'code') {
  200. $state{in_code} = 0;
  201. }
  202. }
  203. sub _endpoint_text($text) {
  204. if ($state{active} && $state{in_tag} && $state{in_code} && $state{in_dt} && $state{last_tag} eq 'code') {
  205. $method->{output_params} //= [];
  206. $text =~ s/\s//gm;
  207. push(@{$method->{output_params}},$text) if $text;
  208. }
  209. }
  210. 1;