git-clone-entity 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523
  1. #!/usr/bin/env perl
  2. package Git::CloneEntity;
  3. use strict;
  4. use warnings;
  5. use FindBin::libs;
  6. use List::Util qw{first};
  7. use HTTP::Tiny;
  8. use Config::Simple;
  9. use Getopt::Long qw{GetOptionsFromArray};
  10. use Pod::Usage;
  11. use Pithub;
  12. use Gogs;
  13. use Git;
  14. use Term::ReadKey();
  15. use IO::Interactive::Tiny();
  16. =head1 DESCRIPTION
  17. It is a common pattern in organizations to have their own git resources, but mirror everything public on one of the big platforms with network effect.
  18. It is also a common pattern to need to clone basically everything for a given user/org when new development environments are instantiated.
  19. Alternatively, you may just want to keep your local development environment up to date for said users/projects.
  20. This program facilitiates cloning your repositories for given users/orgs from either a local gogs/github instance and configuring pushurls for both it and github, or any other github-api compatible mirror(s).
  21. It will configure your 'origin' & 'upstream' remote to fetch from the baseurl provided, and push to it and the mirror(s) provided.
  22. Regardless, remotes for the base and mirrors will also be set up in case individual pushes must be made.
  23. In the event that two different users/orgs have the same named repository (e.g. forks) it
  24. will set up remotes named after the user/org in the event the repo is a fork, and set the 'upstream' name to be the parent repository.
  25. This will not recursively scan for the oldest ancestor as parent; most of the time that's a bad idea.
  26. In the event that all the copies of a repo happen to be a fork on the passed users/orgs,
  27. whatever the --primary_user or --primary_org will be preferred.
  28. It will warn you whenever a repository is missing from either, so you can make it go whirr appropriately.
  29. Using this you can easily migrate an organization from being entirely on github to using private resources or vice versa.
  30. =head1 IMPORTANT
  31. This assumes that the repo names between the base and mirrors is identical.
  32. =head1 CONFIG FILE
  33. You will notice below that the options of this tool can be quite involved.
  34. To simplify deploying this tool across your organization, you can place a configuration file (Config::Simple) in ~/.git/clone-entity.cfg. Example:
  35. baseurl=https://my-gogs-install.test/api/v1
  36. nossh=true
  37. mirrors=https://api.github.com,https://premise-install.github.local/api
  38. me=jane
  39. Ideally all your users have to do is specify which users/orgs to clone w/mirroring and you should be off to the races.
  40. The name of the setting will be pluralized for any option which may be passed multiple times below.
  41. =head1 USAGE
  42. git clone-entity --user $user1 --user $user2 --org $org1 --org $org2 --alias $user1:$mirror_domain:$mirrorUser1 --baseurl=https://my.local.install/ [--mirror https://github.com] [--nossh] [--insecure] [--help]
  43. =head1 OPTIONS
  44. =head3 me
  45. Your username on the baseurl. Relevant to token use, what is visible, etc.
  46. In the event your username is also having it's repos cloned, your remotes will become 'origin', otherwise the 'primary_user' or 'primary_org' will.
  47. --me tarzan
  48. =head3 baseurl
  49. URI for your Git management solution. Currently github and gogs are supported.
  50. --baseurl https://api.github.com
  51. --baseurl https://gogs.mydomain.test/api/v1
  52. =head3 mirror
  53. URI for a git management solution you wish to use for mirroring the repos at the baseurl. May be passed multiple times.
  54. --mirror https://on-prem.github.local/api/
  55. =head3 token
  56. Token for a particular baseurl or mirror. Of the format domain:token.
  57. --token my.domain.test:DEADBEEF
  58. You can omit the auth token on gogs, as we can create them automatically (we will prompt for your password).
  59. =head3 primary_user, primary_org
  60. Primary entity to clone. Consider their repository to be the canonical one. One or the other must be passed. In the event both are, the org is preferred.
  61. In most organizations, you will have the org hold the primary copy of a repo, with developers forking copies. This will become the "upstream" remote.
  62. --primary_org 'BigHugsLLC'
  63. =head3 user
  64. Clone all of this user's repositories. May be passed multiple times.
  65. --user fred
  66. =head3 org
  67. Clone all of this organization's repositories. May be passed multiple times.
  68. --org 'Granite-Industries'
  69. =head3 alias
  70. Map a user/org on your baseurl to a mirror. Of the format base_user:mirror_domain:mirror_user.
  71. Obviously won't work if the mirror is on the same hostname as the baseurl; use a subdomain at the very least.
  72. Also used to alias --me in the case it's different on the primary and mirrors.
  73. --alias george:sprockets.spacely.local:gjetson
  74. =head3 nossh
  75. Don't use SSH clone URIs. Useful for read-only clones & deployments with no ssh-agent.
  76. --nossh
  77. =head3 remote
  78. Name of primary remote. By default will be 'origin', but 'all' is popular.
  79. In the event this is not origin, origin will be set to be the push/pull for the repo at the baseurl.
  80. --remote all
  81. =head1 CONSEQUENTIAL OPTIONS
  82. =head3 insecure
  83. Allow insecure mirrors or baseurls. This is just to prevent footgunning by passing auth over plaintext.
  84. --insecure
  85. =head3 create
  86. Automatically create a copy of the repo on the mirror if it doesn't exist.
  87. --create
  88. =head3 private
  89. If --create is passed, also mirror repositories marked as private, preserving privacy.
  90. =head3 sync
  91. Force push all refs onto the mirror(s).
  92. --sync
  93. =cut
  94. sub _help {
  95. my ($code, $msg, $cb) = @_;
  96. $code //= 0;
  97. $msg //= "";
  98. $cb->() if ref $cb eq 'CODE';
  99. return Pod::Usage::pod2usage( -message => $msg, -exitval => $code);
  100. }
  101. my $domainRipper = qr{^\w+://([\w|\.]+)};
  102. my $verbose;
  103. sub LOG {
  104. print shift."\n" if $verbose;
  105. }
  106. sub main {
  107. my @args = @_;
  108. my %options = (
  109. help => undef,
  110. users => [],
  111. orgs => [],
  112. aliases => [],
  113. tokens => [],
  114. mirrors => [],
  115. baseurl => "",
  116. me => undef,
  117. create => undef,
  118. sync => undef,
  119. insecure => undef,
  120. nossh => undef,
  121. remote => 'origin',
  122. primary_user => undef,
  123. primary_org => undef,
  124. verbose => undef,
  125. );
  126. # Allow options to override configuration
  127. my $home = $ENV{HOME};
  128. mkdir "$home/.git" unless -d "$home/.git";
  129. my $config_file = "$home/.git/clone-entity.cfg";
  130. if (-f $config_file) {
  131. my $conf = Config::Simple->new($config_file);
  132. my %config;
  133. %config = %{$conf->param(-block => 'default')} if $conf;
  134. # Merge the configuration with the options
  135. foreach my $opt (keys(%options)) {
  136. if ( ref $options{$opt} eq 'ARRAY' ) {
  137. next unless exists $config{$opt};
  138. my @arrayed = ref $config{$opt} eq 'ARRAY' ? @{$config{$opt}} : ($config{$opt});
  139. push(@{$options{$opt}}, @arrayed);
  140. next;
  141. }
  142. $options{$opt} = $config{$opt} if exists $config{$opt};
  143. }
  144. }
  145. GetOptionsFromArray(\@args,
  146. 'me=s' => \$options{me},
  147. 'user=s@' => \$options{users},
  148. 'alias=s@' => \$options{aliases},
  149. 'token=s@' => \$options{tokens},
  150. 'org=s@' => \$options{orgs},
  151. 'baseurl=s' => \$options{baseurl},
  152. 'mirror=s@' => \$options{mirrors},
  153. 'insecure' => \$options{insecure},
  154. 'nossh' => \$options{nossh},
  155. 'help' => \$options{help},
  156. 'primary_user=s' => \$options{primary},
  157. 'primary_org=s' => \$options{primary_org},
  158. 'verbose' => \$options{verbose},
  159. );
  160. $verbose = $options{verbose};
  161. # Tiebreaker vote in the event of conflicting forks
  162. push(@{$options{users}}, $options{primary_user}) if $options{primary_user};
  163. push(@{$options{orgs}}, $options{primary_org}) if $options{primary_org};
  164. my $prime_name = $options{primary_org} || $options{primary_user};
  165. return _help() if $options{help};
  166. return _help(1, "Must pass either primary_user or primary_org") unless $prime_name;
  167. return _help(1, "Must pass at least one of: user or org") unless (@{$options{users}} + @{$options{orgs}});
  168. return _help(2, "Must pass baseurl") unless $options{baseurl};
  169. return _help(3, "Must pass your username as --me") unless $options{me};
  170. # Parse Alias mappings
  171. my (%alias_map, %alias_reverse);
  172. foreach my $arg (@{$options{aliases}}) {
  173. my ($actual, $domain, $alias) = split(/:/, $arg);
  174. return _help(3, "aliases must be of the form user:domain:alias") unless $actual && $domain && $alias;
  175. $alias_map{$domain}{$actual} = $alias;
  176. $alias_reverse{$domain}{$alias} = $actual;
  177. }
  178. # Parse tokens
  179. my %tokens;
  180. foreach my $tok (@{$options{tokens}}) {
  181. my ($domain, $token) = split(/:/, $tok);
  182. return _help(4, "tokens must be of the form domain:token") unless $domain && $token;
  183. $tokens{$domain} = $token;
  184. }
  185. # Simplify code below by making the primary just another mirror to fetch
  186. unshift(@{$options{mirrors}}, $options{baseurl});
  187. my $field_name = $options{nossh} ? 'clone_url' : 'ssh_url';
  188. my @repos;
  189. my (%passwords, %clients);
  190. my $cleanup = sub { _cleanup_tokens( \%clients, \%passwords, $options{insecure} ) if %passwords };
  191. foreach my $mirror_url (@{$options{mirrors}}) {
  192. my $server_is_github = _server_is_github($mirror_url);
  193. my ($mirror_domain) = $mirror_url =~ $domainRipper;
  194. my $muser = $options{me};
  195. $muser = $alias_map{$mirror_domain}{$muser} if exists $alias_map{$mirror_domain}{$muser};
  196. my %margs = (
  197. user => $muser,
  198. api_uri => $mirror_url,
  199. );
  200. $margs{token} = $tokens{$mirror_domain} if $tokens{$mirror_domain};
  201. my $mirror = $server_is_github ? Pithub->new(%margs) : Gogs->new(%margs);
  202. # Then it's gogs, and we can just make one.
  203. if (!$margs{token} && !$server_is_github) {
  204. _help(5, "Program must be run interactively to auto-create keys on Gogs installs.") unless IO::Interactive::Tiny::is_interactive();
  205. # Stash the password in case we gotta clean up
  206. $passwords{$mirror_domain} = _prompt("Please type in the password for ".$mirror->user.":");
  207. $tokens{$mirror_domain} = $mirror->get_token(
  208. name => "git-clone-entity",
  209. password => $passwords{$mirror_domain},
  210. insecure => $options{insecure},
  211. );
  212. _help(6, "Could not fetch token from gogs! Check that you supplied the correct username & password.") unless $tokens{$mirror_domain};
  213. $mirror->token($tokens{$mirror_domain});
  214. # Stash for later use by cleanup routines if needed
  215. $clients{$mirror_domain} = $mirror;
  216. }
  217. my @fetched = _fetch_all($mirror, $options{users}, $options{orgs}, \%alias_map, $field_name);
  218. _help(7, "The provided server ($mirror_url) could not list repos!", $cleanup ) unless @fetched;
  219. # GOGS will list all the repos the user *has access to* not all the ones they own.
  220. @fetched = grep { $_->{owner}{login} eq $_->{user} } @fetched;
  221. push(@repos, @fetched);
  222. }
  223. my ($primary_domain) = $options{baseurl} =~ $domainRipper;
  224. my $cloning_myself = first { $_ eq $options{me} } (@{$options{users}},@{$options{orgs}});
  225. my %repodata;
  226. foreach my $repo (@repos) {
  227. $repodata{$repo->{name}} //= {};
  228. my $reversed = $alias_reverse{$repo->{domain}} // {};
  229. my $aliased = exists $reversed->{$repo->{owner}{login}} ? $reversed->{$repo->{owner}{login}} : $repo->{owner}{login};
  230. my $repo_info = {
  231. clone_uri => $repo->{$field_name},
  232. parent => $repo->{upstream_uri},
  233. private => $repo->{private},
  234. is_primary_domain => $repo->{domain} eq $primary_domain,
  235. domain => $repo->{domain},
  236. upstream => $aliased eq $prime_name,
  237. owner => $aliased,
  238. origin => $aliased eq $options{me} || ( !$cloning_myself && $aliased eq $prime_name ),
  239. };
  240. # Set up the "special" URIs
  241. foreach my $remote (qw{origin upstream parent}) {
  242. next unless $repo_info->{$remote};
  243. $repodata{$repo->{name}}{$remote}{fetch} = $repo_info->{clone_uri} if $repo_info->{is_primary_domain};
  244. $repodata{$repo->{name}}{$remote}{push} //= [];
  245. push(@{$repodata{$repo->{name}}{$remote}{push}}, $repo_info->{clone_uri});
  246. }
  247. # Set up the user's remote
  248. $repodata{$repo->{name}}{$aliased}{fetch} = $repo_info->{clone_uri} if $repo_info->{is_primary_domain};
  249. $repodata{$repo->{name}}{$aliased}{push} //= [];
  250. push(@{$repodata{$repo->{name}}{$aliased}{push}}, $repo_info->{clone_uri});
  251. }
  252. $cleanup->();
  253. use Data::Dumper;
  254. die Dumper(\%repodata);
  255. _clone_repos(%repodata);
  256. # Clean up
  257. $cleanup->();
  258. return 0;
  259. }
  260. sub _clone_repos {
  261. my (%repodata) = @_;
  262. foreach my $to_clone (keys(%repodata)) {
  263. #XXX testing removme
  264. next unless $to_clone eq 'perl-Gogs';
  265. my $r = $repodata{$to_clone};
  266. # Don't clone it if it is already present.
  267. if (!-d $to_clone) {
  268. LOG("Cloning $to_clone...");
  269. my $res = Git::command_oneline([ 'clone', $r->{origin}{fetch} ]);
  270. }
  271. LOG("Entering $to_clone...");
  272. my $repo = Git->repository(Directory => $to_clone);
  273. # Figure out what the remotes look like
  274. my $res = $repo->command(qw{remote -v});
  275. my %remotes = _parse_remotes($res);
  276. # Make sure all the remotes are setup correctly.
  277. foreach my $rname (keys(%$r)) {
  278. my $remote = $r->{$rname};
  279. LOG("Setting up remote $rname...");
  280. $repo->command(qw{remote rm}, $rname);
  281. $repo->command(qw{remote add}, $rname, $remote->{fetch});
  282. $repo->command(qw{fetch}, $rname);
  283. foreach my $push_uri (@{$remote->{push}}) {
  284. next if $push_uri eq $remote->{fetch};
  285. $res = $repo->command(qw{remote set-url --add --push}, $rname, $push_uri);
  286. }
  287. }
  288. # TODO figure out which mirrors are missing, and add them if needed (consider privacy)
  289. # Finally, sync up the mirrors if instructed. This is important, as push URIs which aren't in sync will leave git in an inconsistent state.
  290. }
  291. }
  292. sub _fetch_upstream_uri {
  293. my ($mirror, $field_name, $muser, $repo) = @_;
  294. my $upstream_uri;
  295. if ($repo->{fork}) {
  296. LOG("Looking up what $repo->{name} was forked from...");
  297. my $details = $mirror->repos->get( user => $muser, repo => $repo->{name});
  298. _help(9, "Could not fetch repository details for $repo->{name}") unless $details && $details->response->is_success();
  299. my $content = $details->content();
  300. $upstream_uri = $content->{parent}{$field_name};
  301. _help(10, "Could not discern upstream URI for forked repo $repo->{name}!") unless $upstream_uri;
  302. }
  303. return $upstream_uri;
  304. }
  305. sub _parse_remotes {
  306. my ($raw) = shift;
  307. my %parsed;
  308. foreach my $line (split(/\n/, $raw)) {
  309. my ($name, $uri, $type) = $line =~ m/^(.+)\s+(.+)\s+\((.+)\)$/;
  310. if ($type eq 'fetch') {
  311. $parsed{$name}{$type} = $uri;
  312. } else {
  313. $parsed{$name}{$type} //= [];
  314. push(@{$parsed{$name}{$type}}, $uri);
  315. }
  316. }
  317. return %parsed;
  318. }
  319. sub _cleanup_tokens {
  320. my ( $apis, $passwords, $insecure ) = @_;
  321. foreach my $domain (keys(%$apis)) {
  322. my $api = $apis->{$domain};
  323. my $result = $api->delete_token( sha1 => $api->token, password => $passwords->{$domain}, insecure => $insecure );
  324. die "Could not clean up token" unless $result && $result->response->is_success;
  325. }
  326. }
  327. sub _prompt {
  328. my ( $prompt ) = @_;
  329. $prompt ||= "";
  330. my $input = "";
  331. print $prompt;
  332. # We are readin a password
  333. Term::ReadKey::ReadMode('noecho');
  334. {
  335. local $SIG{'INT'} = sub { Term::ReadKey::ReadMode(0); exit 130; };
  336. $input = <STDIN>;
  337. chomp($input) if $input;
  338. }
  339. Term::ReadKey::ReadMode(0);
  340. print "\n";
  341. return $input;
  342. }
  343. sub _fetch_all {
  344. my ($api, $users, $orgs, $alias_map, $field_name) = @_;
  345. my ($domain) = $api->api_uri =~ $domainRipper;
  346. # TODO detect which repo among forks is the "primary" (if one of them is not a fork, use it)
  347. my @repos;
  348. foreach my $user (@$users) {
  349. LOG("Fetching repos for $user...");
  350. $user = $alias_map->{$domain}{$user} if exists $alias_map->{$domain}{$user};
  351. my $result = $api->repos->list( user => $user );
  352. my @fetched = _array_content($result);
  353. @fetched = _augment_repos($api, $field_name, $user, $domain, @fetched);
  354. push(@repos, @fetched);
  355. }
  356. foreach my $org (@$orgs) {
  357. LOG("Fetching repos for $org...");
  358. $org = $alias_map->{$domain}{$org} if exists $alias_map->{$domain}{$org};
  359. my $result = $api->repos->list( org => $org );
  360. my @fetched = _array_content($result);
  361. @fetched = _augment_repos($api, $field_name, $org, $domain, @fetched);
  362. push(@repos, @fetched);
  363. }
  364. return @repos;
  365. }
  366. sub _array_content {
  367. my ($result) = @_;
  368. return () unless $result && $result->response->is_success;
  369. return @{$result->content()} if ref $result->content() eq 'ARRAY';
  370. return ();
  371. }
  372. sub _augment_repos {
  373. my ($mirror, $field_name, $muser, $domain, @fetched) = @_;
  374. @fetched = map {
  375. my $subj = $_;
  376. $subj->{domain} = $domain;
  377. $subj->{upstream_uri} = _fetch_upstream_uri($mirror, $field_name, $muser, $subj);
  378. $subj->{user} = $muser;
  379. $subj
  380. } @fetched;
  381. return @fetched;
  382. }
  383. sub _server_is_github {
  384. my ($uri) = @_;
  385. LOG("Figuring out what kind of server $uri is...");
  386. my $ua = HTTP::Tiny->new();
  387. my $res = $ua->get($uri);
  388. # GOGS will 404 it's api baseurl, github will not
  389. return $res->{success};
  390. }
  391. exit main(@ARGV) unless caller;
  392. 1;