Playwright.pm 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469
  1. package Playwright;
  2. use strict;
  3. use warnings;
  4. #ABSTRACT: Perl client for Playwright
  5. use 5.006;
  6. use v5.28.0; # Before 5.006, v5.10.0 would not be understood.
  7. use File::pushd;
  8. use File::ShareDir();
  9. use File::Basename();
  10. use Cwd();
  11. use LWP::UserAgent();
  12. use Sub::Install();
  13. use Net::EmptyPort();
  14. use JSON::MaybeXS();
  15. use File::Slurper();
  16. use File::Which();
  17. use Capture::Tiny qw{capture_merged capture_stderr};
  18. use Carp qw{confess};
  19. use Playwright::Base();
  20. use Playwright::Util();
  21. no warnings 'experimental';
  22. use feature qw{signatures};
  23. =head1 SYNOPSIS
  24. use Playwright;
  25. my $handle = Playwright->new();
  26. my $browser = $handle->launch( headless => 0, type => 'chrome' );
  27. my $page = $browser->newPage();
  28. my $res = $page->goto('http://somewebsite.test', { waitUntil => 'networkidle' });
  29. my $frameset = $page->mainFrame();
  30. my $kidframes = $frameset->childFrames();
  31. # Grab us some elements
  32. my $body = $page->select('body');
  33. # You can also get the innerText
  34. my $text = $body->textContent();
  35. $body->click();
  36. $body->screenshot();
  37. my $kids = $body->selectMulti('*');
  38. =head1 DESCRIPTION
  39. Perl interface to a lightweight node.js webserver that proxies commands runnable by Playwright.
  40. Checks and automatically installs a copy of the node dependencies in the local folder if needed.
  41. Currently understands commands you can send to all the playwright classes defined in api.json (installed wherever your OS puts shared files for CPAN distributions).
  42. See L<https://playwright.dev/versions> and drill down into your relevant version (run `npm list playwright` )
  43. for what the classes do, and their usage.
  44. All the classes mentioned there will correspond to a subclass of the Playwright namespace. For example:
  45. # ISA Playwright
  46. my $playwright = Playwright->new();
  47. # ISA Playwright::BrowserContext
  48. my $ctx = $playwright->newContext(...);
  49. # ISA Playwright::Page
  50. my $page = $ctx->newPage(...);
  51. # ISA Playwright::ElementHandle
  52. my $element = $ctx->select('body');
  53. See example.pl for a more thoroughly fleshed-out display on how to use this module.
  54. =head3 Why this documentation does not list all available subclasses and their methods
  55. The documentation and names for the subclasses of Playwright follow the spec strictly:
  56. Playwright::BrowserContext => L<https://playwright.dev/docs/api/class-browsercontext>
  57. Playwright::Page => L<https://playwright.dev/docs/api/class-page>
  58. Playwright::ElementHandle => L<https://playwright.dev/docs/api/class-elementhandle>
  59. ...And so on. 100% of the spec is accessible regardless of the Playwright version installed
  60. due to these classes & their methods being built dynamically at run time based on the specification
  61. which is shipped with Playwright itself.
  62. You can check what methods are installed for each subclass by doing the following:
  63. use Data::Dumper;
  64. print Dumper($instance->{spec});
  65. There are two major exceptions in how things work versus the upstream Playwright documentation, detailed below in the C<Selectors> section.
  66. =head2 Selectors
  67. The selector functions have to be renamed from starting with $ for obvious reasons.
  68. The renamed functions are as follows:
  69. =over 4
  70. =item $ => select
  71. =item $$ => selectMulti
  72. =item $eval => eval
  73. =item $$eval => evalMulti
  74. =back
  75. These functions are present as part of the Page, Frame and ElementHandle classes.
  76. =head2 Scripts
  77. The evaluate() and evaluateHandle() functions can only be run in string mode.
  78. To maximize the usefulness of these, I have wrapped the string passed with the following function:
  79. const fun = new Function (toEval);
  80. args = [
  81. fun,
  82. ...args
  83. ];
  84. As such you can effectively treat the script string as a function body.
  85. The same restriction on only being able to pass one arg remains from the upstream:
  86. L<https://playwright.dev/docs/api/class-page#pageevalselector-pagefunction-arg>
  87. You will have to refer to the arguments array as described here:
  88. L<https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Functions/arguments>
  89. =head3 example of evaluate()
  90. # Read the console
  91. $page->on('console',"return [...arguments]");
  92. my $promise = $page->waitForEvent('console');
  93. #TODO This request can race, the server framework I use to host the playwright spec is *not* FIFO (YET)
  94. sleep 1;
  95. $page->evaluate("console.log('hug')");
  96. my $console_log = $handle->await( $promise );
  97. print "Logged to console: '".$console_log->text()."'\n";
  98. =head2 Asynchronous operations
  99. The waitFor* methods defined on various classes will return an instance of L<AsyncData>, a part of the L<Async> module.
  100. You will then need to wait on the result of the backgrounded action with the await() method documented below.
  101. # Assuming $handle is a Playwright object
  102. my $async = $page->waitForEvent('console');
  103. $page->evaluate('console.log("whee")');
  104. my $result = $handle->await( $async );
  105. my $logged = $result->text();
  106. =head1 INSTALLATION NOTE
  107. If you install this module from CPAN, you will likely encounter a croak() telling you to install node module dependencies.
  108. Follow the instructions and things should be just fine.
  109. If you aren't, please file a bug!
  110. =head1 CONSTRUCTOR
  111. =head2 new(HASH) = (Playwright)
  112. Creates a new browser and returns a handle to interact with it.
  113. =head3 INPUT
  114. debug (BOOL) : Print extra messages from the Playwright server process
  115. timeout (INTEGER) : Seconds to wait for the playwright server to spin up and down. Default: 30s
  116. =cut
  117. our ( $spec, $server_bin, $node_bin, %mapper, %methods_to_rename );
  118. sub _check_node {
  119. # Check that node is installed
  120. $node_bin = File::Which::which('node');
  121. confess("node must exist, be in your PATH and executable") unless -x $node_bin;
  122. my $global_install = '';
  123. my $path2here = File::Basename::dirname( Cwd::abs_path( $INC{'Playwright.pm'} ) );
  124. # Make sure it's possible to start the server
  125. $server_bin = "$path2here/../bin/playwright_server";
  126. if (!-f $server_bin ) {
  127. $server_bin = File::Which::which('playwright_server');
  128. $global_install = 1;
  129. }
  130. confess("Can't locate Playwright server in '$server_bin'!")
  131. unless -f $server_bin;
  132. # Attempt to start the server. If we can't do this, we almost certainly have dependency issues.
  133. my ($output) = capture_merged { system($node_bin, $server_bin, '--check') };
  134. return if $output =~ m/OK/;
  135. # Check for the necessary modules, this relies on package.json
  136. my $npm_bin = File::Which::which('npm');
  137. confess("npm must exist and be executable") unless -x $npm_bin;
  138. # pushd/popd closure
  139. {
  140. my $curdir = pushd(File::Basename::dirname($server_bin));
  141. # Attempt to install deps automatically.
  142. confess("Production install of node dependencies must be done manually by nonroot users. Run the following:\n\n pushd '$curdir' && sudo npm i yargs express playwright uuid; popd\n\n") if $global_install;
  143. my $err = capture_stderr { qx{npm i} };
  144. # XXX apparently doing it 'once more with feeling' fixes issues on windows, lol
  145. $err = capture_stderr { qx{npm i} };
  146. my $exit = $? >> 8;
  147. # Ignore failing for bogus reasons
  148. if ( $err !~ m/package-lock/ ) {
  149. confess("Error installing node dependencies:\n$err") if $exit;
  150. }
  151. }
  152. }
  153. sub _build_classes {
  154. $mapper{mouse} = sub {
  155. my ( $self, $res ) = @_;
  156. return Playwright::Mouse->new(
  157. handle => $self,
  158. id => $res->{_guid},
  159. type => 'Mouse'
  160. );
  161. };
  162. $mapper{keyboard} = sub {
  163. my ( $self, $res ) = @_;
  164. return Playwright::Keyboard->new(
  165. handle => $self,
  166. id => $res->{_guid},
  167. type => 'Keyboard'
  168. );
  169. };
  170. %methods_to_rename = (
  171. '$' => 'select',
  172. '$$' => 'selectMulti',
  173. '$eval' => 'eval',
  174. '$$eval' => 'evalMulti',
  175. );
  176. foreach my $class ( keys(%$spec) ) {
  177. $mapper{$class} = sub {
  178. my ( $self, $res ) = @_;
  179. my $class = "Playwright::$class";
  180. return $class->new(
  181. handle => $self,
  182. id => $res->{_guid},
  183. type => $class
  184. );
  185. };
  186. #All of the Playwright::* Classes are made by this MAGIC
  187. Sub::Install::install_sub(
  188. {
  189. code => sub ( $classname, %options ) {
  190. @class::ISA = qw{Playwright::Base};
  191. $options{type} = $class;
  192. return Playwright::Base::new( $classname, %options );
  193. },
  194. as => 'new',
  195. into => "Playwright::$class",
  196. }
  197. ) unless "Playwright::$class"->can('new');;
  198. # Hack in mouse and keyboard objects for the Page class
  199. if ( $class eq 'Page' ) {
  200. foreach my $hid (qw{keyboard mouse}) {
  201. Sub::Install::install_sub(
  202. {
  203. code => sub {
  204. my $self = shift;
  205. $Playwright::mapper{$hid}->(
  206. $self,
  207. {
  208. _type => $self->{type},
  209. _guid => $self->{guid}
  210. }
  211. ) if exists $Playwright::mapper{$hid};
  212. },
  213. as => $hid,
  214. into => "Playwright::$class",
  215. }
  216. ) unless "Playwright::$class"->can($hid);
  217. }
  218. }
  219. # Install the subroutines if they aren't already
  220. foreach my $method ( ( keys( %{ $spec->{$class}{members} } ), 'on' ) ) {
  221. next if grep { $_ eq $method } qw{keyboard mouse};
  222. my $renamed =
  223. exists $methods_to_rename{$method}
  224. ? $methods_to_rename{$method}
  225. : $method;
  226. Sub::Install::install_sub(
  227. {
  228. code => sub {
  229. my $self = shift;
  230. Playwright::Base::_request(
  231. $self,
  232. args => [@_],
  233. command => $method,
  234. object => $self->{guid},
  235. type => $self->{type}
  236. );
  237. },
  238. as => $renamed,
  239. into => "Playwright::$class",
  240. }
  241. ) unless "Playwright::$class"->can($renamed);
  242. }
  243. }
  244. }
  245. sub BEGIN {
  246. our $SKIP_BEGIN;
  247. _check_node() unless $SKIP_BEGIN;
  248. }
  249. sub new ( $class, %options ) {
  250. #XXX yes, this is a race, so we need retries in _start_server
  251. my $port = Net::EmptyPort::empty_port();
  252. my $timeout = $options{timeout} // 30;
  253. my $self = bless(
  254. {
  255. ua => $options{ua} // LWP::UserAgent->new(),
  256. port => $port,
  257. debug => $options{debug},
  258. pid => _start_server( $port, $timeout, $options{debug} ),
  259. parent => $$,
  260. timeout => $timeout,
  261. },
  262. $class
  263. );
  264. $self->_check_and_build_spec();
  265. _build_classes();
  266. return $self;
  267. }
  268. sub _check_and_build_spec ($self) {
  269. return $spec if ref $spec eq 'HASH';
  270. $spec = Playwright::Util::request(
  271. 'GET', 'spec', $self->{port}, $self->{ua},
  272. );
  273. confess("Could not retrieve Playwright specification. Check that your playwright installation is correct and complete.") unless ref $spec eq 'HASH';
  274. return $spec;
  275. }
  276. =head1 METHODS
  277. =head2 launch(HASH) = Playwright::Browser
  278. The Argument hash here is essentially those you'd see from browserType.launch(). See:
  279. L<https://playwright.dev/docs/api/class-browsertype#browsertypelaunchoptions>
  280. There is an additional "special" argument, that of 'type', which is used to specify what type of browser to use, e.g. 'firefox'.
  281. =cut
  282. sub launch ( $self, %args ) {
  283. Playwright::Base::_coerce(
  284. $spec->{BrowserType}{members},
  285. args => [ \%args ],
  286. command => 'launch'
  287. );
  288. delete $args{command};
  289. my $msg = Playwright::Util::request(
  290. 'POST', 'session', $self->{port}, $self->{ua},
  291. type => delete $args{type},
  292. args => [ \%args ]
  293. );
  294. return $Playwright::mapper{ $msg->{_type} }->( $self, $msg )
  295. if ( ref $msg eq 'HASH' )
  296. && $msg->{_type}
  297. && exists $Playwright::mapper{ $msg->{_type} };
  298. return $msg;
  299. }
  300. =head2 await (AsyncData) = Object
  301. Waits for an asynchronous operation returned by the waitFor* methods to complete and returns the value.
  302. =cut
  303. sub await ( $self, $promise ) {
  304. confess("Input must be an AsyncData") unless $promise->isa('AsyncData');
  305. my $obj = $promise->result(1);
  306. return $obj unless $obj->{_type};
  307. my $class = "Playwright::$obj->{_type}";
  308. return $class->new(
  309. type => $obj->{_type},
  310. id => $obj->{_guid},
  311. handle => $self
  312. );
  313. }
  314. =head2 quit, DESTROY
  315. Terminate the browser session and wait for the Playwright server to terminate.
  316. Automatically called when the Playwright object goes out of scope.
  317. =cut
  318. sub quit ($self) {
  319. # Prevent double destroy after quit()
  320. return if $self->{killed};
  321. # Prevent destructor from firing in child processes so we can do things like async()
  322. # This should also prevent the waitpid below from deadlocking due to two processes waiting on the same pid.
  323. return unless $$ == $self->{parent};
  324. $self->{killed} = 1;
  325. print "Attempting to terminate server process...\n" if $self->{debug};
  326. Playwright::Util::request( 'GET', 'shutdown', $self->{port}, $self->{ua} );
  327. # 0 is always WCONTINUED, 1 is always WNOHANG, and POSIX is an expensive import
  328. # When 0 is returned, the process is still active, so it needs more persuasion
  329. foreach (0..3) {
  330. return unless waitpid( $self->{pid}, 1) == 0;
  331. sleep 1;
  332. }
  333. # Advanced persuasion
  334. print "Forcibly terminating server process...\n" if $self->{debug};
  335. kill('TERM', $self->{pid});
  336. #XXX unfortunately I can't just do a SIGALRM, because blocking system calls can't be intercepted on win32
  337. foreach (0..$self->{timeout}) {
  338. return unless waitpid( $self->{pid}, 1 ) == 0;
  339. sleep 1;
  340. }
  341. warn "Could not shut down playwright server!";
  342. return;
  343. }
  344. sub DESTROY ($self) {
  345. $self->quit();
  346. }
  347. sub _start_server ( $port, $timeout, $debug ) {
  348. $debug = $debug ? '-d' : '';
  349. $ENV{DEBUG} = 'pw:api' if $debug;
  350. my $pid = fork // confess("Could not fork");
  351. if ($pid) {
  352. print "Waiting for port to come up..." if $debug;
  353. Net::EmptyPort::wait_port( $port, $timeout )
  354. or confess("Server never came up after 30s!");
  355. print "done\n" if $debug;
  356. return $pid;
  357. }
  358. exec( $node_bin, $server_bin, "-p", $port, $debug );
  359. }
  360. 1;