Jump to content

Wikipedia:WikiProject Redirect/Inconsistent targets

From Wikipedia, the free encyclopedia

A more frequently updated version of this report is available on Toolforge at https://tb-dev.toolforge.org/ISR/.


Regenerating this report

[edit]
-- First generate a list of applicable titles
DROP TABLE IF EXISTS redir_titles;

CREATE TABLE redir_titles (
      rt_id int(8) unsigned NOT NULL,
      rt_title varchar(255) BINARY NOT NULL,
      rt_crushed varchar(255) BINARY,

      PRIMARY KEY (rt_id)
);


INSERT INTO redir_titles ( rt_id, rt_title )
SELECT /* SLOW_OK */ page_id, page_title
FROM enwiki_p.page
WHERE page_namespace = 0
AND  length( page_title ) >= 10
AND  page_is_redirect = 1;

UPDATE redir_titles
SET  rt_crushed = lcase( rt_title )
WHERE rt_title != lcase( rt_title );

DELETE FROM redir_titles WHERE rt_crushed IS NULL;

ALTER TABLE redir_titles ADD INDEX( rt_crushed );

-- Now find pairs of similar redirects

DROP TABLE IF EXISTS similar_redirects;

CREATE TABLE similar_redirects (
      sr_src_id1 int(8) unsigned NOT NULL,
      sr_src_title1 varchar(255) binary NOT NULL,
	  sr_targ_ns1 int(11),
	  sr_targ_title1 varchar(255) binary,
	  
      sr_src_id2 int(8) unsigned NOT NULL,
      sr_src_title2 varchar(255) binary NOT NULL,
	  sr_targ_ns2 int(11),
	  sr_targ_title2 varchar(255) binary,
	  
      PRIMARY KEY (sr_src_id1,sr_src_id2)
);


-- This will have found most pairs in both directions (A v B / B v A) - eliminate duplicates of this type
-- by checking the A id is always less than the B.

INSERT INTO similar_redirects( sr_src_id1, sr_src_title1, sr_src_id2, sr_src_title2 )
SELECT /* SLOW_OK */ a.rt_id, a.rt_title, b.rt_id, b.rt_title
FROM   redir_titles a
INNER JOIN  redir_titles b ON a.rt_crushed = b.rt_crushed
WHERE a.rt_id < b.rt_id;


-- Fill in the redirect target for each of these and dismiss any with the same target
-- Doing it this way is ugly, but is SQL language neutral.  Go standardisation!

UPDATE similar_redirects
SET sr_targ_ns1 = ( SELECT rd_namespace FROM enwiki_p.redirect WHERE rd_from = sr_src_id1 );

UPDATE similar_redirects
SET sr_targ_title1 = ( SELECT rd_title FROM enwiki_p.redirect WHERE rd_from = sr_src_id1 );

UPDATE similar_redirects
SET sr_targ_ns2 = ( SELECT rd_namespace FROM enwiki_p.redirect WHERE rd_from = sr_src_id2 );

UPDATE similar_redirects
SET sr_targ_title2 = ( SELECT rd_title FROM enwiki_p.redirect WHERE rd_from = sr_src_id2 );

DELETE
FROM similar_redirects
WHERE sr_targ_ns1 = sr_targ_ns2
AND   sr_targ_title1 = sr_targ_title2;

-- See what we have left

SELECT count(*) FROM similar_redirects;

SELECT concat( '* [[' , sr_src_title1, ']] redirects to [[', sr_targ_title1, ']], but [[', sr_src_title2, ']] redirects to [[', sr_targ_title2, ']]' )
FROM similar_redirects
WHERE  sr_targ_ns1 = 0
AND    sr_targ_ns2 = 0
ORDER by sr_src_title1 ASC
LIMIT 200;