Jump to content

User:AnomieBOT/source/d/Redirects.pm

From Wikipedia, the free encyclopedia
package d::Redirects;

use utf8;
use strict;
use AnomieBOT::API;
AnomieBOT::API::load('d::IWNS');

use vars qw/@ISA/;
@ISA=qw/d::IWNS/;

=pod

=head1 NAME

d::Redirects - AnomieBOT redirect functions decorator

=head1 SYNOPSIS

 use AnomieBOT::API;

 $api = new AnomieBOT::API('conf.ini', 1);
 $api->decorators(qw/d::Redirects/);

=head1 DESCRIPTION

C<d::Redirects> contains functions for handling redirects for use by an
AnomieBOT task. When "d::Redirects" is used as a decorator on the API object,
the following methods are available.

=head1 METHODS PROVIDED

=over

=item $api->resolve_redirects( @pages )

Returns a hash mapping each page name in the list to its target (possibly
itself). The returned value is cached for a short time, so repeated calls are
not particularly inefficient.

If an error occurs, returns a 1-element hash mapping the empty string to the
the API error object.

=item $api->apply_redirect_map( $title, $mapping )

Uses the mapping hash to find the target title, correctly detecting loops.

=cut

sub resolve_redirects {
    my ($api, @pages)=@_;
    my $memc = $api->cache;

    my %ret=();
    my @lookup=();
    foreach my $p (@pages) {
        next if $p eq '';
        my $c = $memc->get("\$d::Redirects::resolve_redirects_cache<><<$p>>");
        if(defined($c)){
            $ret{$p}=$c;
        } else {
            push @lookup, $p;
        }
    }

    # Everything cached?
    return %ret unless @lookup;

    my $limit = $api->paramLimit( 'query', 'titles' );
    return $limit if ref($limit);

    my %v=();
    while(@lookup){
        my @p=splice(@lookup,0,$limit);
        my $res=$api->query([],
            titles    => join('|', @p),
            redirects => 1,
        );
        if($res->{'code'} ne 'success'){
            $api->warn("Failed to retrieve redirect list: ".$res->{'error'}."\n");
            return (''=>$res);
        }
        my %map=();
        if(exists($res->{'query'}{'normalized'})){
            $map{$_->{'from'}}=$_->{'to'} foreach @{$res->{'query'}{'normalized'}};
        }
        if(exists($res->{'query'}{'redirects'})){
            $map{$_->{'from'}}=$_->{'to'} foreach @{$res->{'query'}{'redirects'}};
        }
        foreach my $p (@p){
            my $n=$api->apply_redirect_map( $p, \%map );
            $v{$p}=$n;
            $memc->set("\$d::Redirects::resolve_redirects_cache<><<$p>>", $n, 7200);
        }
    }

    foreach my $p (@pages) {
        next if $p eq '';
        next if exists($ret{$p});
        $ret{$p}=$v{$p};
    }

    return %ret;
}

sub apply_redirect_map {
    my ($api, $title, $map) = @_;
    my %seen=( $title => 1 );
    while(exists($map->{$title}) && $map->{$title} ne $title){
        $title = $map->{$title};
        if(exists($seen{$title})){
            $api->warn("Redirect loop involving [[$title]]");
            last;
        }
        $seen{$title}=1;
    }
    return $title;
}

=pod

=item $api->redirects_to( @pages )

Returns a hash mapping each redirect back to the page name, as well as an entry
mapping each page to itself. The returned value is cached for a short time, so
repeated calls are not particularly inefficient.

If an error occurs, returns a 1-element hash mapping the empty string to the
the API error object.

=item $api->redirects_to_resolved( @pages )

This is roughly equivalent to passing the list of pages through
C<< $api->resolve_redirects >> then C<< $api->redirects_to >>. Returns a hash
like the latter.

If an error occurs, returns a 1-element hash mapping the empty string to the
the API error object.

=cut

sub _redirects_to {
    my ($api, $pages, $resolve)=@_;
    my $memc = $api->cache;

    my %ret=();
    my @lookup=();
    foreach my $p (@$pages) {
        next if $p eq '';
        my $c = $memc->get("\$d::Redirects::redirects_to_cache<>${resolve}::<<$p>>");
        if(defined($c)){
            %ret = (%ret, %$c);
        } else {
            push @lookup, $p;
        }
    }

    # Everything cached?
    return %ret unless @lookup;

    my %q = (
        prop    => 'redirects',
        rdlimit => 'max',
        rdprop  => 'title',
    );
    $q{'redirects'} = 1 if $resolve;

    my $limit = $api->paramLimit( 'query', 'titles' );
    return $limit if ref($limit);

    while(@lookup){
        my @p=splice(@lookup,0,$limit);
        my $res=$api->query([], %q, titles => join('|', @p) );
        if($res->{'code'} ne 'success'){
            $api->warn("Failed to resolve redirects: ".$res->{'error'}."\n");
            return (''=>$res);
        }

        my %v = ();
        foreach my $p (values %{$res->{'query'}{'pages'} // {}}) {
            my $t = $p->{'title'};
            $ret{$t} = $t;
            $v{$t}{$t} = $t;
            foreach my $r (@{$p->{'redirects'} // []}) {
                my $r2 = $r->{'title'};
                $ret{$r2} = $t;
                $v{$t}{$r2} = $t;
            }
            $memc->set("\$d::Redirects::redirects_to_cache<>${resolve}::<<$t>>", $v{$t}, 7200);
            if ( $resolve ) {
                foreach my $r (@{$p->{'redirects'} // []}) {
                    my $r2 = $r->{'title'};
                    $memc->set("\$d::Redirects::redirects_to_cache<>${resolve}::<<$r2>>", $v{$t}, 7200);
                }
            }
        }

        my %map=();
        if(exists($res->{'query'}{'normalized'})){
            $map{$_->{'from'}}=$_->{'to'} foreach @{$res->{'query'}{'normalized'}};
        }
        if(exists($res->{'query'}{'redirects'})){
            $map{$_->{'from'}}=$_->{'to'} foreach @{$res->{'query'}{'redirects'}};
        }
        foreach my $p (@p){
            my $n=$api->apply_redirect_map( $p, \%map );
            $v{$n}{$p} = $n;
            $ret{$p} = $n;
            $memc->set("\$d::Redirects::redirects_to_cache<>${resolve}::<<$p>>", $v{$n}, 7200);
        }
    }

    return %ret;
}

sub _redirects_to_aliases {
    my ($api, %ret)=@_;
    my %aliases = $api->namespace_aliases();
    for my $k (keys %ret) {
        next unless $k =~ /^([^:]+):(.+)$/;
        next unless exists( $aliases{$1} );
        for my $p (@{$aliases{$1}}) {
            $ret{"$p:$2"} = $ret{$k};
        }
    }
    return %ret;
}

sub redirects_to {
    my $api = shift;
    return _redirects_to_aliases( $api, _redirects_to( $api, [@_], 0 ) );
}

sub redirects_to_resolved {
    my $api = shift;
    return _redirects_to_aliases( $api, _redirects_to( $api, [@_], 1 ) );
}

=pod

=item $api->flush_redirect_cache()

Clears the caches used by C<resolve_redirects()> and C<redirects_to()>.

=cut

sub flush_redirect_cache {
    my $api=shift;
    $api->cache->flush_prefix('$d::Redirects::resolve_redirects_cache');
    $api->cache->flush_prefix('$d::Redirects::redirects_to_cache');
}

=pod

=item $api->redirect_regex()

Returns a regex that matches the magic at the start of an article that makes it
into a redirect (i.e. the "#REDIRECT").

If an error occurs, returns the API error object.

=cut

sub redirect_regex {
    my $api=shift;

    if(!exists($api->{'$d::Redirects::redirect_regex'})){
        my $redata = $api->cache->get('$d::Redirects::magicdata');
        if(!defined($redata)){
            my $res=$api->query([], meta=>'siteinfo', siprop=>'magicwords');
            if($res->{'code'} ne 'success'){
                $api->warn("Failed to get redirect magic: ".$res->{'error'}."\n");
                return $res;
            }
            my @redir=();
            my $ci='';
            foreach (@{$res->{'query'}{'magicwords'}}){
                next unless $_->{'name'} eq 'redirect';
                @redir=@{$_->{'aliases'}};
                $ci=exists($_->{'case-sensitive'})?'':'i';
            }
            $redata=[$ci, @redir];
            $api->cache->set('$d::Redirects::magicdata', $redata, 7*86400);
        }
        my ($ci, @redir) = @$redata;
        if(@redir){
            my $r=join('|', map "\Q$_\E", @redir);
            $api->{'$d::Redirects::redirect_regex'}=qr/^\s*(?$ci:$r)\s*(?::\s*)?/;
        } else {
            # No redirects supported?
            $api->{'$d::Redirects::redirect_regex'}=qr/(?!)/;
        }
    }
    return $api->{'$d::Redirects::redirect_regex'};
}

1;

=pod

=back

=head1 COPYRIGHT

Copyright 20082019 Anomie

This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.