#!/usr/bin/env perl package Git::CloneEntity; use strict; use warnings; use FindBin::libs; use List::Util qw{first}; use HTTP::Tiny; use Config::Simple; use Getopt::Long qw{GetOptionsFromArray}; use Pod::Usage; use Pithub; use Gogs; use Git; use Term::ReadKey(); use IO::Interactive::Tiny(); =head1 DESCRIPTION It is a common pattern in organizations to have their own git resources, but mirror everything public on one of the big platforms with network effect. It is also a common pattern to need to clone basically everything for a given user/org when new development environments are instantiated. Alternatively, you may just want to keep your local development environment up to date for said users/projects. This program facilitiates cloning your repositories for given users/orgs from either a local gogs/github instance and configuring pushurls for both it and github, or any other github-api compatible mirror(s). It will configure your 'origin' & 'upstream' remote to fetch from the baseurl provided, and push to it and the mirror(s) provided. Regardless, remotes for the base and mirrors will also be set up in case individual pushes must be made. In the event that two different users/orgs have the same named repository (e.g. forks) it will set up remotes named after the user/org in the event the repo is a fork, and set the 'upstream' name to be the parent repository. This will not recursively scan for the oldest ancestor as parent; most of the time that's a bad idea. In the event that all the copies of a repo happen to be a fork on the passed users/orgs, whatever the --primary_user or --primary_org will be preferred. It will warn you whenever a repository is missing from either, so you can make it go whirr appropriately. Using this you can easily migrate an organization from being entirely on github to using private resources or vice versa. =head1 IMPORTANT This assumes that the repo names between the base and mirrors is identical. =head1 CONFIG FILE You will notice below that the options of this tool can be quite involved. To simplify deploying this tool across your organization, you can place a configuration file (Config::Simple) in ~/.git/clone-entity.cfg. Example: baseurl=https://my-gogs-install.test/api/v1 nossh=true mirrors=https://api.github.com,https://premise-install.github.local/api me=jane Ideally all your users have to do is specify which users/orgs to clone w/mirroring and you should be off to the races. The name of the setting will be pluralized for any option which may be passed multiple times below. =head1 USAGE git clone-entity --user $user1 --user $user2 --org $org1 --org $org2 --alias $user1:$mirror_domain:$mirrorUser1 --baseurl=https://my.local.install/ [--mirror https://github.com] [--nossh] [--insecure] [--help] =head1 OPTIONS =head3 me Your username on the baseurl. Relevant to token use, what is visible, etc. In the event your username is also having it's repos cloned, your remotes will become 'origin', otherwise the 'primary_user' or 'primary_org' will. --me tarzan =head3 baseurl URI for your Git management solution. Currently github and gogs are supported. --baseurl https://api.github.com --baseurl https://gogs.mydomain.test/api/v1 =head3 mirror URI for a git management solution you wish to use for mirroring the repos at the baseurl. May be passed multiple times. --mirror https://on-prem.github.local/api/ =head3 token Token for a particular baseurl or mirror. Of the format domain:token. --token my.domain.test:DEADBEEF You can omit the auth token on gogs, as we can create them automatically (we will prompt for your password). =head3 primary_user, primary_org Primary entity to clone. Consider their repository to be the canonical one. One or the other must be passed. In the event both are, the org is preferred. In most organizations, you will have the org hold the primary copy of a repo, with developers forking copies. This will become the "upstream" remote. --primary_org 'BigHugsLLC' =head3 user Clone all of this user's repositories. May be passed multiple times. --user fred =head3 org Clone all of this organization's repositories. May be passed multiple times. --org 'Granite-Industries' =head3 alias Map a user/org on your baseurl to a mirror. Of the format base_user:mirror_domain:mirror_user. Obviously won't work if the mirror is on the same hostname as the baseurl; use a subdomain at the very least. Also used to alias --me in the case it's different on the primary and mirrors. --alias george:sprockets.spacely.local:gjetson =head3 nossh Don't use SSH clone URIs. Useful for read-only clones & deployments with no ssh-agent. --nossh =head3 remote Name of primary remote. By default will be 'origin', but 'all' is popular. In the event this is not origin, origin will be set to be the push/pull for the repo at the baseurl. --remote all =head1 CONSEQUENTIAL OPTIONS =head3 insecure Allow insecure mirrors or baseurls. This is just to prevent footgunning by passing auth over plaintext. --insecure =head3 create Automatically create a copy of the repo on the mirror if it doesn't exist. --create =head3 private If --create is passed, also mirror repositories marked as private, preserving privacy. =head3 sync Force push all refs onto the mirror(s). --sync =cut sub _help { my ($code, $msg, $cb) = @_; $code //= 0; $msg //= ""; $cb->() if ref $cb eq 'CODE'; return Pod::Usage::pod2usage( -message => $msg, -exitval => $code); } my $domainRipper = qr{^\w+://([\w|\.]+)}; my $verbose; sub LOG { print shift."\n" if $verbose; } sub main { my @args = @_; my %options = ( help => undef, users => [], orgs => [], aliases => [], tokens => [], mirrors => [], baseurl => "", me => undef, create => undef, sync => undef, insecure => undef, nossh => undef, remote => 'origin', primary_user => undef, primary_org => undef, verbose => undef, ); # Allow options to override configuration my $home = $ENV{HOME}; mkdir "$home/.git" unless -d "$home/.git"; my $config_file = "$home/.git/clone-entity.cfg"; if (-f $config_file) { my $conf = Config::Simple->new($config_file); my %config; %config = %{$conf->param(-block => 'default')} if $conf; # Merge the configuration with the options foreach my $opt (keys(%options)) { if ( ref $options{$opt} eq 'ARRAY' ) { next unless exists $config{$opt}; my @arrayed = ref $config{$opt} eq 'ARRAY' ? @{$config{$opt}} : ($config{$opt}); push(@{$options{$opt}}, @arrayed); next; } $options{$opt} = $config{$opt} if exists $config{$opt}; } } GetOptionsFromArray(\@args, 'me=s' => \$options{me}, 'user=s@' => \$options{users}, 'alias=s@' => \$options{aliases}, 'token=s@' => \$options{tokens}, 'org=s@' => \$options{orgs}, 'baseurl=s' => \$options{baseurl}, 'mirror=s@' => \$options{mirrors}, 'insecure' => \$options{insecure}, 'nossh' => \$options{nossh}, 'help' => \$options{help}, 'primary_user=s' => \$options{primary}, 'primary_org=s' => \$options{primary_org}, 'verbose' => \$options{verbose}, ); $verbose = $options{verbose}; # Tiebreaker vote in the event of conflicting forks push(@{$options{users}}, $options{primary_user}) if $options{primary_user}; push(@{$options{orgs}}, $options{primary_org}) if $options{primary_org}; my $prime_name = $options{primary_org} || $options{primary_user}; return _help() if $options{help}; return _help(1, "Must pass either primary_user or primary_org") unless $prime_name; return _help(1, "Must pass at least one of: user or org") unless (@{$options{users}} + @{$options{orgs}}); return _help(2, "Must pass baseurl") unless $options{baseurl}; return _help(3, "Must pass your username as --me") unless $options{me}; # Parse Alias mappings my (%alias_map, %alias_reverse); foreach my $arg (@{$options{aliases}}) { my ($actual, $domain, $alias) = split(/:/, $arg); return _help(3, "aliases must be of the form user:domain:alias") unless $actual && $domain && $alias; $alias_map{$domain}{$actual} = $alias; $alias_reverse{$domain}{$alias} = $actual; } # Parse tokens my %tokens; foreach my $tok (@{$options{tokens}}) { my ($domain, $token) = split(/:/, $tok); return _help(4, "tokens must be of the form domain:token") unless $domain && $token; $tokens{$domain} = $token; } # Simplify code below by making the primary just another mirror to fetch unshift(@{$options{mirrors}}, $options{baseurl}); my $field_name = $options{nossh} ? 'clone_url' : 'ssh_url'; my @repos; my (%passwords, %clients); my $cleanup = sub { _cleanup_tokens( \%clients, \%passwords, $options{insecure} ) if %passwords }; foreach my $mirror_url (@{$options{mirrors}}) { my $server_is_github = _server_is_github($mirror_url); my ($mirror_domain) = $mirror_url =~ $domainRipper; my $muser = $options{me}; $muser = $alias_map{$mirror_domain}{$muser} if exists $alias_map{$mirror_domain}{$muser}; my %margs = ( user => $muser, api_uri => $mirror_url, ); $margs{token} = $tokens{$mirror_domain} if $tokens{$mirror_domain}; my $mirror = $server_is_github ? Pithub->new(%margs) : Gogs->new(%margs); # Then it's gogs, and we can just make one. if (!$margs{token} && !$server_is_github) { _help(5, "Program must be run interactively to auto-create keys on Gogs installs.") unless IO::Interactive::Tiny::is_interactive(); # Stash the password in case we gotta clean up $passwords{$mirror_domain} = _prompt("Please type in the password for ".$mirror->user.":"); $tokens{$mirror_domain} = $mirror->get_token( name => "git-clone-entity", password => $passwords{$mirror_domain}, insecure => $options{insecure}, ); _help(6, "Could not fetch token from gogs! Check that you supplied the correct username & password.") unless $tokens{$mirror_domain}; $mirror->token($tokens{$mirror_domain}); # Stash for later use by cleanup routines if needed $clients{$mirror_domain} = $mirror; } my @fetched = _fetch_all($mirror, $options{users}, $options{orgs}, \%alias_map, $field_name); _help(7, "The provided server ($mirror_url) could not list repos!", $cleanup ) unless @fetched; # GOGS will list all the repos the user *has access to* not all the ones they own. @fetched = grep { $_->{owner}{login} eq $_->{user} } @fetched; push(@repos, @fetched); } my ($primary_domain) = $options{baseurl} =~ $domainRipper; my $cloning_myself = first { $_ eq $options{me} } (@{$options{users}},@{$options{orgs}}); my %repodata; foreach my $repo (@repos) { $repodata{$repo->{name}} //= {}; my $reversed = $alias_reverse{$repo->{domain}} // {}; my $aliased = exists $reversed->{$repo->{owner}{login}} ? $reversed->{$repo->{owner}{login}} : $repo->{owner}{login}; my $repo_info = { clone_uri => $repo->{$field_name}, parent => $repo->{upstream_uri}, private => $repo->{private}, is_primary_domain => $repo->{domain} eq $primary_domain, domain => $repo->{domain}, upstream => $aliased eq $prime_name, owner => $aliased, origin => $aliased eq $options{me} || ( !$cloning_myself && $aliased eq $prime_name ), }; # Set up the "special" URIs foreach my $remote (qw{origin upstream parent}) { next unless $repo_info->{$remote}; $repodata{$repo->{name}}{$remote}{fetch} = $repo_info->{clone_uri} if $repo_info->{is_primary_domain}; $repodata{$repo->{name}}{$remote}{push} //= []; push(@{$repodata{$repo->{name}}{$remote}{push}}, $repo_info->{clone_uri}); } # Set up the user's remote $repodata{$repo->{name}}{$aliased}{fetch} = $repo_info->{clone_uri} if $repo_info->{is_primary_domain}; $repodata{$repo->{name}}{$aliased}{push} //= []; push(@{$repodata{$repo->{name}}{$aliased}{push}}, $repo_info->{clone_uri}); } $cleanup->(); use Data::Dumper; die Dumper(\%repodata); _clone_repos(%repodata); # Clean up $cleanup->(); return 0; } sub _clone_repos { my (%repodata) = @_; foreach my $to_clone (keys(%repodata)) { #XXX testing removme next unless $to_clone eq 'perl-Gogs'; my $r = $repodata{$to_clone}; # Don't clone it if it is already present. if (!-d $to_clone) { LOG("Cloning $to_clone..."); my $res = Git::command_oneline([ 'clone', $r->{origin}{fetch} ]); } LOG("Entering $to_clone..."); my $repo = Git->repository(Directory => $to_clone); # Figure out what the remotes look like my $res = $repo->command(qw{remote -v}); my %remotes = _parse_remotes($res); # Make sure all the remotes are setup correctly. foreach my $rname (keys(%$r)) { my $remote = $r->{$rname}; LOG("Setting up remote $rname..."); $repo->command(qw{remote rm}, $rname); $repo->command(qw{remote add}, $rname, $remote->{fetch}); $repo->command(qw{fetch}, $rname); foreach my $push_uri (@{$remote->{push}}) { next if $push_uri eq $remote->{fetch}; $res = $repo->command(qw{remote set-url --add --push}, $rname, $push_uri); } } # TODO figure out which mirrors are missing, and add them if needed (consider privacy) # Finally, sync up the mirrors if instructed. This is important, as push URIs which aren't in sync will leave git in an inconsistent state. } } sub _fetch_upstream_uri { my ($mirror, $field_name, $muser, $repo) = @_; my $upstream_uri; if ($repo->{fork}) { LOG("Looking up what $repo->{name} was forked from..."); my $details = $mirror->repos->get( user => $muser, repo => $repo->{name}); _help(9, "Could not fetch repository details for $repo->{name}") unless $details && $details->response->is_success(); my $content = $details->content(); $upstream_uri = $content->{parent}{$field_name}; _help(10, "Could not discern upstream URI for forked repo $repo->{name}!") unless $upstream_uri; } return $upstream_uri; } sub _parse_remotes { my ($raw) = shift; my %parsed; foreach my $line (split(/\n/, $raw)) { my ($name, $uri, $type) = $line =~ m/^(.+)\s+(.+)\s+\((.+)\)$/; if ($type eq 'fetch') { $parsed{$name}{$type} = $uri; } else { $parsed{$name}{$type} //= []; push(@{$parsed{$name}{$type}}, $uri); } } return %parsed; } sub _cleanup_tokens { my ( $apis, $passwords, $insecure ) = @_; foreach my $domain (keys(%$apis)) { my $api = $apis->{$domain}; my $result = $api->delete_token( sha1 => $api->token, password => $passwords->{$domain}, insecure => $insecure ); die "Could not clean up token" unless $result && $result->response->is_success; } } sub _prompt { my ( $prompt ) = @_; $prompt ||= ""; my $input = ""; print $prompt; # We are readin a password Term::ReadKey::ReadMode('noecho'); { local $SIG{'INT'} = sub { Term::ReadKey::ReadMode(0); exit 130; }; $input = ; chomp($input) if $input; } Term::ReadKey::ReadMode(0); print "\n"; return $input; } sub _fetch_all { my ($api, $users, $orgs, $alias_map, $field_name) = @_; my ($domain) = $api->api_uri =~ $domainRipper; # TODO detect which repo among forks is the "primary" (if one of them is not a fork, use it) my @repos; foreach my $user (@$users) { LOG("Fetching repos for $user..."); $user = $alias_map->{$domain}{$user} if exists $alias_map->{$domain}{$user}; my $result = $api->repos->list( user => $user ); my @fetched = _array_content($result); @fetched = _augment_repos($api, $field_name, $user, $domain, @fetched); push(@repos, @fetched); } foreach my $org (@$orgs) { LOG("Fetching repos for $org..."); $org = $alias_map->{$domain}{$org} if exists $alias_map->{$domain}{$org}; my $result = $api->repos->list( org => $org ); my @fetched = _array_content($result); @fetched = _augment_repos($api, $field_name, $org, $domain, @fetched); push(@repos, @fetched); } return @repos; } sub _array_content { my ($result) = @_; return () unless $result && $result->response->is_success; return @{$result->content()} if ref $result->content() eq 'ARRAY'; return (); } sub _augment_repos { my ($mirror, $field_name, $muser, $domain, @fetched) = @_; @fetched = map { my $subj = $_; $subj->{domain} = $domain; $subj->{upstream_uri} = _fetch_upstream_uri($mirror, $field_name, $muser, $subj); $subj->{user} = $muser; $subj } @fetched; return @fetched; } sub _server_is_github { my ($uri) = @_; LOG("Figuring out what kind of server $uri is..."); my $ua = HTTP::Tiny->new(); my $res = $ua->get($uri); # GOGS will 404 it's api baseurl, github will not return $res->{success}; } exit main(@ARGV) unless caller; 1;