Jump to content

User:AnomieBOT/source/d/IWNS.pm

From Wikipedia, the free encyclopedia
package d::IWNS;

use utf8;
use strict;
use AnomieBOT::API;

use vars qw/@ISA/;
@ISA=qw//;

=pod

=head1 NAME

d::IWNS - AnomieBOT decorator for Interwiki and Namespace mappings

=head1 SYNOPSIS

 use AnomieBOT::API;

 $api = new AnomieBOT::API('conf.ini', 1);
 $api->decorators(qw/d::IWNS/);

=head1 DESCRIPTION

C<d::IWNS> contains functions to load the interwiki, interlanguage, and
namespace mappings. When "d::IWNS" is used as a decorator on the API object,
the following methods are available.

=head1 METHODS PROVIDED

=over

=item $api->load_IWNS_maps()

Loads the data for the functions below from the wiki. Returns a true value on
success, undef on error.

=item $api->interlanguage_map()

Loads the interlanguage prefixes from the wiki. Returns a hash mapping prefixes
to languages, or undef on error.

=item $api->interlanguage_re()

Returns a regex matching all interlanguage prefixes, or undef on error.

=item $api->interwiki_map()

=item $api->interwiki_map( $local_only )

Loads the interwiki prefixes from the wiki. Returns a hash mapping prefixes
to URLs, or undef on error.

=item $api->interwiki_re()

=item $api->interwiki_re( $local_only )

Returns a regex matching all interwiki prefixes, or undef on error.

=item $api->namespace_map()

Loads the namespace prefixes from the wiki. Returns a hash mapping names 
to namespace numbers, or undef on error.

=item $api->namespace_aliases()

Loads the namespace prefixes from the wiki. Returns a hash mapping canonical
names to aliases, or undef on error.

=item $api->namespace_reverse_map()

=item $api->namespace_reverse_map( $all )

Loads the namespace prefixes from the wiki. Returns a hash mapping numbers to
canonical names, or numbers to the array of all names if $all is true, or undef
on error.

=item $api->namespace_re()

=item $api->namespace_re( @ns )

Returns a regex matching all namespace prefixes, or only the namespace prefixes
whose numbers are in C<@ns>. Returns undef on error.

=item $api->namespace_re( '!', @ns )

Returns a regex matching the namespace prefixes whose numbers are not listed in
C<@ns>. Returns undef on error.

=cut

sub load_IWNS_maps {
    my $api=shift;
    my $memc = $api->cache;

    # Increment to flush the cache.
    my $ver = 2;

    my $ret=$memc->get(
        '$d::IWNS::version',
        '$d::IWNS::interlang_map',
        '$d::IWNS::interwiki_map_all',
        '$d::IWNS::interwiki_map_local',
        '$d::IWNS::namespace_map',
        '$d::IWNS::namespace_rmap_canon',
        '$d::IWNS::namespace_rmap_all',
        '$d::IWNS::namespace_aliases',
    );
    return $ret if keys(%$ret) == 8 && $ret->{'$d::IWNS::version'} >= $ver;

    my $res=$api->query([],
        meta   => 'siteinfo',
        siprop => 'interwikimap|namespaces|namespacealiases',
    );
    if($res->{'code'} ne 'success'){
        $api->warn("Failed to retrieve siteinfo: ".$res->{'error'}."\n");
        return undef;
    }

    $ret = {
        '$d::IWNS::version' => $ver,
        '$d::IWNS::interlang_map' => {},
        '$d::IWNS::interwiki_map_all' => {},
        '$d::IWNS::interwiki_map_local' => {},
        '$d::IWNS::namespace_map' => {},
        '$d::IWNS::namespace_rmap_canon' => {},
        '$d::IWNS::namespace_rmap_all' => {},
        '$d::IWNS::namespace_aliases' => {},
    };

    # Namespace aliases override iw prefixes, case insensitively, and enwiki depends on this.
    # This stores the lowercased version of every NS to check when skipping IWs.
    my %nslc=();

    my %il=();
    my %iw1=();
    my %iw2=();
    my %ns1=();
    my %ns2=();
    my %ns3=();
    foreach (values %{$res->{'query'}{'namespaces'}}){
        $nslc{lc($_->{'*'})}=1;
        $ret->{'$d::IWNS::namespace_map'}{$_->{'*'}}=$_->{'id'};
        $ret->{'$d::IWNS::namespace_rmap_canon'}{$_->{'id'}}=$_->{'*'};
        push @{$ret->{'$d::IWNS::namespace_rmap_all'}{$_->{'id'}}}, $_->{'*'};
        if(exists($_->{'canonical'}) && $_->{'canonical'} ne $_->{'*'}){
            $nslc{lc($_->{'canonical'})}=1;
            $ret->{'$d::IWNS::namespace_map'}{$_->{'canonical'}}=$_->{'id'};
            push @{$ret->{'$d::IWNS::namespace_rmap_all'}{$_->{'id'}}}, $_->{'canonical'};
        }
    }
    foreach (@{$res->{'query'}{'namespacealiases'}}){
        $nslc{lc($_->{'*'})}=1;
        $ret->{'$d::IWNS::namespace_map'}{$_->{'*'}}=$_->{'id'};
        push @{$ret->{'$d::IWNS::namespace_rmap_all'}{$_->{'id'}}}, $_->{'*'};
        my $ns = $ret->{'$d::IWNS::namespace_rmap_canon'}{$_->{'id'}};
        push @{$ret->{'$d::IWNS::namespace_aliases'}{$ns}}, $_->{'*'};
    }
    foreach (@{$res->{'query'}{'interwikimap'}}){
        next if exists($nslc{lc($_->{'prefix'})});
        $ret->{'$d::IWNS::interlang_map'}{$_->{'prefix'}}=$_->{'language'} if exists($_->{'language'});
        $ret->{'$d::IWNS::interwiki_map_all'}{$_->{'prefix'}}=$_->{'url'} if exists($_->{'url'});
        $ret->{'$d::IWNS::interwiki_map_local'}{$_->{'prefix'}}=$_->{'url'} if(exists($_->{'url'}) && exists($_->{'local'}));
    }
    $memc->set( $ret, 7*86400 );
    return $ret;
}

sub interlanguage_map {
    my $api=shift;
    my $ret = $api->load_IWNS_maps();
    return $ret ? %{$ret->{'$d::IWNS::interlang_map'}} : undef;
}

sub interwiki_map {
    my $api=shift;
    my $k=($_[0] // 0)?'$d::IWNS::interwiki_map_local':'$d::IWNS::interwiki_map_all';
    my $ret = $api->load_IWNS_maps();
    return $ret ? %{$ret->{$k}} : undef;
}

sub namespace_map {
    my $api=shift;
    my $ret = $api->load_IWNS_maps();
    return $ret ? %{$ret->{'$d::IWNS::namespace_map'}} : undef;
}

sub namespace_reverse_map {
    my $api=shift;
    my $k=($_[0] // 0)?'$d::IWNS::namespace_rmap_all':'$d::IWNS::namespace_rmap_canon';
    my $ret = $api->load_IWNS_maps();
    return $ret ? %{$ret->{$k}} : undef;
}

sub namespace_aliases {
    my $api=shift;
    my $ret = $api->load_IWNS_maps();
    return $ret ? %{$ret->{'$d::IWNS::namespace_aliases'}} : undef;
}

sub interlanguage_re {
    my $api=shift;
    return undef unless $api->load_IWNS_maps();
    my %x=$api->interlanguage_map(@_);
    return qw/(?!)/ unless %x;
    my $re=join('|', map("\Q$_\E", keys %x));
    $re=~s/\\ /[ _]/g;
    return qr/$re/i;
}

sub interwiki_re {
    my $api=shift;
    return undef unless $api->load_IWNS_maps();
    my %x=$api->interwiki_map(@_);
    return qw/(?!)/ unless %x;
    my $re=join('|', map("\Q$_\E", keys %x));
    $re=~s/\\ /[ _]/g;
    return qr/$re/i;
}

sub namespace_re {
    my $api=shift;
    return undef unless $api->load_IWNS_maps();
    my %x=$api->namespace_reverse_map(1);
    my @x=();
    if(@_){
        if($_[0] eq '!'){
            foreach my $k (keys %x){
                push @x, @{$x{$k}} unless grep $_ eq $k, @_;
            }
        } else {
            foreach my $k (@_){
                push @x, @{$x{$k}} if exists($x{$k});
            }
        }
    } else {
        push @x, map @$_, values %x;
    }
    return qr/(?!)/ unless @x;
    my $re=join('|', map("\Q$_\E", @x));
    $re=~s/\\ /[ _]+/g;
    return qr/$re/i;
}

1;

=pod

=back

=head1 COPYRIGHT

Copyright 20102019 Anomie

This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.