User:AnomieBOT/source/d/Redirects.pm
Appearance
See /doc for formatted documentation |
package d::Redirects;
use utf8;
use strict;
use AnomieBOT::API;
AnomieBOT::API::load('d::IWNS');
use vars qw/@ISA/;
@ISA=qw/d::IWNS/;
=pod
=head1 NAME
d::Redirects - AnomieBOT redirect functions decorator
=head1 SYNOPSIS
use AnomieBOT::API;
$api = new AnomieBOT::API('conf.ini', 1);
$api->decorators(qw/d::Redirects/);
=head1 DESCRIPTION
C<d::Redirects> contains functions for handling redirects for use by an
AnomieBOT task. When "d::Redirects" is used as a decorator on the API object,
the following methods are available.
=head1 METHODS PROVIDED
=over
=item $api->resolve_redirects( @pages )
Returns a hash mapping each page name in the list to its target (possibly
itself). The returned value is cached for a short time, so repeated calls are
not particularly inefficient.
If an error occurs, returns a 1-element hash mapping the empty string to the
the API error object.
=item $api->apply_redirect_map( $title, $mapping )
Uses the mapping hash to find the target title, correctly detecting loops.
=cut
sub resolve_redirects {
my ($api, @pages)=@_;
my $memc = $api->cache;
my %ret=();
my @lookup=();
foreach my $p (@pages) {
next if $p eq '';
my $c = $memc->get("\$d::Redirects::resolve_redirects_cache<><<$p>>");
if(defined($c)){
$ret{$p}=$c;
} else {
push @lookup, $p;
}
}
# Everything cached?
return %ret unless @lookup;
my $limit = $api->paramLimit( 'query', 'titles' );
return $limit if ref($limit);
my %v=();
while(@lookup){
my @p=splice(@lookup,0,$limit);
my $res=$api->query([],
titles => join('|', @p),
redirects => 1,
);
if($res->{'code'} ne 'success'){
$api->warn("Failed to retrieve redirect list: ".$res->{'error'}."\n");
return (''=>$res);
}
my %map=();
if(exists($res->{'query'}{'normalized'})){
$map{$_->{'from'}}=$_->{'to'} foreach @{$res->{'query'}{'normalized'}};
}
if(exists($res->{'query'}{'redirects'})){
$map{$_->{'from'}}=$_->{'to'} foreach @{$res->{'query'}{'redirects'}};
}
foreach my $p (@p){
my $n=$api->apply_redirect_map( $p, \%map );
$v{$p}=$n;
$memc->set("\$d::Redirects::resolve_redirects_cache<><<$p>>", $n, 7200);
}
}
foreach my $p (@pages) {
next if $p eq '';
next if exists($ret{$p});
$ret{$p}=$v{$p};
}
return %ret;
}
sub apply_redirect_map {
my ($api, $title, $map) = @_;
my %seen=( $title => 1 );
while(exists($map->{$title}) && $map->{$title} ne $title){
$title = $map->{$title};
if(exists($seen{$title})){
$api->warn("Redirect loop involving [[$title]]");
last;
}
$seen{$title}=1;
}
return $title;
}
=pod
=item $api->redirects_to( @pages )
Returns a hash mapping each redirect back to the page name, as well as an entry
mapping each page to itself. The returned value is cached for a short time, so
repeated calls are not particularly inefficient.
If an error occurs, returns a 1-element hash mapping the empty string to the
the API error object.
=item $api->redirects_to_resolved( @pages )
This is roughly equivalent to passing the list of pages through
C<< $api->resolve_redirects >> then C<< $api->redirects_to >>. Returns a hash
like the latter.
If an error occurs, returns a 1-element hash mapping the empty string to the
the API error object.
=cut
sub _redirects_to {
my ($api, $pages, $resolve)=@_;
my $memc = $api->cache;
my %ret=();
my @lookup=();
foreach my $p (@$pages) {
next if $p eq '';
my $c = $memc->get("\$d::Redirects::redirects_to_cache<>${resolve}::<<$p>>");
if(defined($c)){
%ret = (%ret, %$c);
} else {
push @lookup, $p;
}
}
# Everything cached?
return %ret unless @lookup;
my %q = (
prop => 'redirects',
rdlimit => 'max',
rdprop => 'title',
);
$q{'redirects'} = 1 if $resolve;
my $limit = $api->paramLimit( 'query', 'titles' );
return $limit if ref($limit);
while(@lookup){
my @p=splice(@lookup,0,$limit);
my $res=$api->query([], %q, titles => join('|', @p) );
if($res->{'code'} ne 'success'){
$api->warn("Failed to resolve redirects: ".$res->{'error'}."\n");
return (''=>$res);
}
my %v = ();
foreach my $p (values %{$res->{'query'}{'pages'} // {}}) {
my $t = $p->{'title'};
$ret{$t} = $t;
$v{$t}{$t} = $t;
foreach my $r (@{$p->{'redirects'} // []}) {
my $r2 = $r->{'title'};
$ret{$r2} = $t;
$v{$t}{$r2} = $t;
}
$memc->set("\$d::Redirects::redirects_to_cache<>${resolve}::<<$t>>", $v{$t}, 7200);
if ( $resolve ) {
foreach my $r (@{$p->{'redirects'} // []}) {
my $r2 = $r->{'title'};
$memc->set("\$d::Redirects::redirects_to_cache<>${resolve}::<<$r2>>", $v{$t}, 7200);
}
}
}
my %map=();
if(exists($res->{'query'}{'normalized'})){
$map{$_->{'from'}}=$_->{'to'} foreach @{$res->{'query'}{'normalized'}};
}
if(exists($res->{'query'}{'redirects'})){
$map{$_->{'from'}}=$_->{'to'} foreach @{$res->{'query'}{'redirects'}};
}
foreach my $p (@p){
my $n=$api->apply_redirect_map( $p, \%map );
$v{$n}{$p} = $n;
$ret{$p} = $n;
$memc->set("\$d::Redirects::redirects_to_cache<>${resolve}::<<$p>>", $v{$n}, 7200);
}
}
return %ret;
}
sub _redirects_to_aliases {
my ($api, %ret)=@_;
my %aliases = $api->namespace_aliases();
for my $k (keys %ret) {
next unless $k =~ /^([^:]+):(.+)$/;
next unless exists( $aliases{$1} );
for my $p (@{$aliases{$1}}) {
$ret{"$p:$2"} = $ret{$k};
}
}
return %ret;
}
sub redirects_to {
my $api = shift;
return _redirects_to_aliases( $api, _redirects_to( $api, [@_], 0 ) );
}
sub redirects_to_resolved {
my $api = shift;
return _redirects_to_aliases( $api, _redirects_to( $api, [@_], 1 ) );
}
=pod
=item $api->flush_redirect_cache()
Clears the caches used by C<resolve_redirects()> and C<redirects_to()>.
=cut
sub flush_redirect_cache {
my $api=shift;
$api->cache->flush_prefix('$d::Redirects::resolve_redirects_cache');
$api->cache->flush_prefix('$d::Redirects::redirects_to_cache');
}
=pod
=item $api->redirect_regex()
Returns a regex that matches the magic at the start of an article that makes it
into a redirect (i.e. the "#REDIRECT").
If an error occurs, returns the API error object.
=cut
sub redirect_regex {
my $api=shift;
if(!exists($api->{'$d::Redirects::redirect_regex'})){
my $redata = $api->cache->get('$d::Redirects::magicdata');
if(!defined($redata)){
my $res=$api->query([], meta=>'siteinfo', siprop=>'magicwords');
if($res->{'code'} ne 'success'){
$api->warn("Failed to get redirect magic: ".$res->{'error'}."\n");
return $res;
}
my @redir=();
my $ci='';
foreach (@{$res->{'query'}{'magicwords'}}){
next unless $_->{'name'} eq 'redirect';
@redir=@{$_->{'aliases'}};
$ci=exists($_->{'case-sensitive'})?'':'i';
}
$redata=[$ci, @redir];
$api->cache->set('$d::Redirects::magicdata', $redata, 7*86400);
}
my ($ci, @redir) = @$redata;
if(@redir){
my $r=join('|', map "\Q$_\E", @redir);
$api->{'$d::Redirects::redirect_regex'}=qr/^\s*(?$ci:$r)\s*(?::\s*)?/;
} else {
# No redirects supported?
$api->{'$d::Redirects::redirect_regex'}=qr/(?!)/;
}
}
return $api->{'$d::Redirects::redirect_regex'};
}
1;
=pod
=back
=head1 COPYRIGHT
Copyright 2008–2019 Anomie
This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.