Jump to content

Wikipedia:WikiProject Red Link Recovery/Link matching script/Repeated letters

From Wikipedia, the free encyclopedia

(This repeats the same SQL statements twice deliberately - this is to ensure that repetitions of 3 or 4 letters in a row are handled properly).

This crushing method often produces unacceptably high numbers of false positives. Additional work must be done to filter the results. Positive hits are more likely in titles with more then 2 words.

delete from suggestions
where length( suggestion ) - length( replace( suggestion, '_', '' ) ) < 2;

// NB - do not crush xx's and ii's as these appear in roman numerals
//     - also  ee's and oo's - these are just too common in English

update crushed_art set title = replace( title, 'aa', 'a' );
update crushed_art set title = replace( title, 'bb', 'b' );
update crushed_art set title = replace( title, 'cc', 'c' );
update crushed_art set title = replace( title, 'dd', 'd' );
update crushed_art set title = replace( title, 'ff', 'f' );
update crushed_art set title = replace( title, 'gg', 'g' );
update crushed_art set title = replace( title, 'hh', 'h' );
update crushed_art set title = replace( title, 'jj', 'j' );
update crushed_art set title = replace( title, 'kk', 'k' );
update crushed_art set title = replace( title, 'll', 'l' );
update crushed_art set title = replace( title, 'mm', 'm' );
update crushed_art set title = replace( title, 'nn', 'n' );
update crushed_art set title = replace( title, 'pp', 'p' );
update crushed_art set title = replace( title, 'qq', 'q' );
update crushed_art set title = replace( title, 'rr', 'r' );
update crushed_art set title = replace( title, 'ss', 's' );
update crushed_art set title = replace( title, 'tt', 't' );
update crushed_art set title = replace( title, 'uu', 'u' );
update crushed_art set title = replace( title, 'vv', 'v' );
update crushed_art set title = replace( title, 'ww', 'w' );
update crushed_art set title = replace( title, 'yy', 'y' );
update crushed_art set title = replace( title, 'zz', 'z' );

update crushed_links set link = replace( link, 'aa', 'a' );
update crushed_links set link = replace( link, 'bb', 'b' );
update crushed_links set link = replace( link, 'cc', 'c' );
update crushed_links set link = replace( link, 'dd', 'd' );
update crushed_links set link = replace( link, 'ff', 'f' );
update crushed_links set link = replace( link, 'gg', 'g' );
update crushed_links set link = replace( link, 'hh', 'h' );
update crushed_links set link = replace( link, 'jj', 'j' );
update crushed_links set link = replace( link, 'kk', 'k' );
update crushed_links set link = replace( link, 'll', 'l' );
update crushed_links set link = replace( link, 'mm', 'm' );
update crushed_links set link = replace( link, 'nn', 'n' );
update crushed_links set link = replace( link, 'pp', 'p' );
update crushed_links set link = replace( link, 'qq', 'q' );
update crushed_links set link = replace( link, 'rr', 'r' );
update crushed_links set link = replace( link, 'ss', 's' );
update crushed_links set link = replace( link, 'tt', 't' );
update crushed_links set link = replace( link, 'uu', 'u' );
update crushed_links set link = replace( link, 'vv', 'v' );
update crushed_links set link = replace( link, 'ww', 'w' );
update crushed_links set link = replace( link, 'yy', 'y' );
update crushed_links set link = replace( link, 'zz', 'z' );

update crushed_art set title = replace( title, 'aa', 'a' );
update crushed_art set title = replace( title, 'bb', 'b' );
update crushed_art set title = replace( title, 'cc', 'c' );
update crushed_art set title = replace( title, 'dd', 'd' );
update crushed_art set title = replace( title, 'ff', 'f' );
update crushed_art set title = replace( title, 'gg', 'g' );
update crushed_art set title = replace( title, 'hh', 'h' );
update crushed_art set title = replace( title, 'jj', 'j' );
update crushed_art set title = replace( title, 'kk', 'k' );
update crushed_art set title = replace( title, 'll', 'l' );
update crushed_art set title = replace( title, 'mm', 'm' );
update crushed_art set title = replace( title, 'nn', 'n' );
update crushed_art set title = replace( title, 'pp', 'p' );
update crushed_art set title = replace( title, 'qq', 'q' );
update crushed_art set title = replace( title, 'rr', 'r' );
update crushed_art set title = replace( title, 'ss', 's' );
update crushed_art set title = replace( title, 'tt', 't' );
update crushed_art set title = replace( title, 'uu', 'u' );
update crushed_art set title = replace( title, 'vv', 'v' );
update crushed_art set title = replace( title, 'ww', 'w' );
update crushed_art set title = replace( title, 'yy', 'y' );
update crushed_art set title = replace( title, 'zz', 'z' );

update crushed_links set link = replace( link, 'aa', 'a' );
update crushed_links set link = replace( link, 'bb', 'b' );
update crushed_links set link = replace( link, 'cc', 'c' );
update crushed_links set link = replace( link, 'dd', 'd' );
update crushed_links set link = replace( link, 'ff', 'f' );
update crushed_links set link = replace( link, 'gg', 'g' );
update crushed_links set link = replace( link, 'hh', 'h' );
update crushed_links set link = replace( link, 'jj', 'j' );
update crushed_links set link = replace( link, 'kk', 'k' );
update crushed_links set link = replace( link, 'll', 'l' );
update crushed_links set link = replace( link, 'mm', 'm' );
update crushed_links set link = replace( link, 'nn', 'n' );
update crushed_links set link = replace( link, 'pp', 'p' );
update crushed_links set link = replace( link, 'qq', 'q' );
update crushed_links set link = replace( link, 'rr', 'r' );
update crushed_links set link = replace( link, 'ss', 's' );
update crushed_links set link = replace( link, 'tt', 't' );
update crushed_links set link = replace( link, 'uu', 'u' );
update crushed_links set link = replace( link, 'vv', 'v' );
update crushed_links set link = replace( link, 'ww', 'w' );
update crushed_links set link = replace( link, 'yy', 'y' );
update crushed_links set link = replace( link, 'zz', 'z' );