User:Ed Poor/hiragana.js
Appearance
Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump. This code will be executed when previewing this page. |
Documentation for this user script can be added at User:Ed Poor/hiragana. |
// Converts Hiragana text to Romaji
// Includes an optional self-test
var bugs = false;
function test() {
assertEquals( kanaToRomaji('か'), 'ka');
assertEquals( kanaToRomaji('と'), 'to');
assertEquals( hiraganaToRomaji('ちば'), 'Chiba');
assertEquals( hiraganaToRomaji('まって'), 'matte');
assertEquals( hiraganaToRomaji('いっぱい'), 'ippai');
assertEquals( hiraganaToRomaji('いっ'), 'iっ');
assertEquals( hiraganaToRomaji('きょこ'), 'Kyoko');
assertEquals( hiraganaToRomaji('しゃい'), 'shai');
assertEquals( hiraganaToRomaji('いらっしゃい'), 'irasshai'); // buggy
assertEquals( hiraganaToRomaji('まっちがった'), 'matchigatta'); // buggy
if (!bugs) debugPrint("Okay");
}
function hiraganaToRomaji(kanaText) {
//for each Japanese syllable that has a conversion, use it.
//any characters that can't be converted, just pass them through.
var result = "";
while (kanaText.length > 0) {
var romaji;
var kanaProcessed;
var kana = kanaText.charAt(0);
var follower = kanaText.charAt(1);
if (isSokuonPair(kana, follower)) {
// SMALL TSU is called a 'sokuon' and it doubles or strengthens the next kana's 'consonant'
//Find the first letter of the next kana's romaji equivalent, and pass that along.
// Example: T + TE or P + PA or M + MA
/* romaji = getSokuonSyllable(follower);
kanaProcessed = 2;
*/
romaji = getSokuonConsonant(follower);
kanaProcessed = 1;
} else if ( isComboSyllable(kana, follower) ) {
var prefix = kanaToRomaji(kana).charAt(0);
romaji = prefix + kanaToRomaji(follower);
kanaProcessed = 2;
} else {
romaji = kanaToRomaji( kana );
kanaProcessed = 1;
}
romaji = kanaToHepburn( romaji ); // comment out if you don't want Hepburn romanization
result += romaji;
// Dispose of processed characters.
kanaText = kanaText.substring(kanaProcessed);
}
return result;
}
function isComboSyllable(leader, follower) {
var leaderRomaji = kanaToRomaji(leader);
var leaderVowel = leaderRomaji.charAt(leaderRomaji.length - 1); // last char must be vowel i
if ( leaderVowel != 'i' ) return false;
else return isSmallYaYuYo(follower);
}
function isSmallYaYuYo(kana) {
var littleVowels = 'ゃゅょ' ;
return (isNeedleInHaystack(kana, littleVowels));
}
function isSokuonPair(leader, follower) {
// Must start with small tsu
if (leader != 'っ') return false;
if (follower.length == 0) return false;
// only for K, T, S, or P
var followerConsonant = kanaToRomaji( follower ).charAt(0);
var allowable = 'ktsp';
return allowable.indexOf(followerConsonant) >= 0;
}
function getSokuonSyllable(kana) {
var romaji = kanaToRomaji(kana);
var consonant = romaji.charAt(0);
return consonant + romaji;
}
function getSokuonConsonant(kana) {
var romaji = kanaToRomaji(kana);
return romaji.charAt(0);
}
// Convert one kana to its plain romaji equivalent
function kanaToRomaji(text) {
var myArray = [
['ぁ', 'a'],
['あ', 'a'],
['ぃ', 'i'],
['い', 'i'],
['ぅ', 'u'],
['う', 'u'],
['ぇ', 'e'],
['え', 'e'],
['ぉ', 'o'],
['お', 'o'],
['か', 'ka'],
['が', 'ga'],
['き', 'ki'],
['ぎ', 'gi'],
['く', 'ku'],
['ぐ', 'gu'],
['け', 'ke'],
['げ', 'ge'],
['こ', 'ko'],
['ご', 'go'],
['さ', 'sa'],
['ざ', 'za'],
['し', 'si'],
['じ', 'zi'],
['す', 'su'],
['ず', 'zu'],
['せ', 'se'],
['ぜ', 'ze'],
['そ', 'so'],
['ぞ', 'zo'],
['た', 'ta'],
['だ', 'da'],
['ち', 'ti'],
['ぢ', 'di'],
// ['っ', 'sokuon'], // handled differently
['つ', 'tu'],
['づ', 'du'],
['て', 'te'],
['で', 'de'],
['と', 'to'],
['ど', 'do'],
['な', 'na'],
['に', 'ni'],
['ぬ', 'nu'],
['ね', 'ne'],
['の', 'no'],
['は', 'ha'],
['ば', 'ba'],
['ぱ', 'pa'],
['ひ', 'hi'],
['び', 'bi'],
['ぴ', 'pi'],
['ふ', 'hu'],
['ぶ', 'bu'],
['ぷ', 'pu'],
['へ', 'he'],
['べ', 'be'],
['ぺ', 'pe'],
['ほ', 'ho'],
['ぼ', 'bo'],
['ぽ', 'po'],
['ま', 'ma'],
['み', 'mi'],
['む', 'mu'],
['め', 'me'],
['も', 'mo'],
['ゃ', 'ya'],
['や', 'ya'],
['ゅ', 'yu'],
['ゆ', 'yu'],
['ょ', 'yo'],
['よ', 'yo'],
['ら', 'ra'],
['り', 'ri'],
['る', 'ru'],
['れ', 're'],
['ろ', 'ro'],
['ゎ', 'wa'],
['わ', 'wa'],
['ゐ', 'wi'],
['ゑ', 'we'],
['を', 'wo'],
['ん', 'n'],
['ヴ', 'vu']
];
for (var i = 0; i < myArray.length; i++) {
var kana = myArray[ i ] [ 0 ];
if (kana == text) {
return myArray[ i ] [ 1 ];
}
}
return text;
}
// These "eat" the Y from YA, YU, YO.
hungryYoonArray = [
'SHI',
'CHI',
'JI'
];
function kanaToHepburn(romaji) {
var result = romaji;
// Look for Hepburn variations.
var hepburn = [
['si', 'shi'],
['zi', 'ji'],
['ti', 'chi'],
['di', 'ji'],
['tu', 'tsu'],
['du', 'zu'],
['hu', 'fu'],
['sya', 'sha'],
['syu', 'shu'],
['syo', 'sho'],
['zya', 'ja'],
['zyu', 'ju'],
['zyo', 'jo'],
['tya', 'cha'],
['tyu', 'chu'],
['tyo', 'cho']
];
for (var i = 0; i < hepburn.length; i++) {
var myHep = hepburn[ i ] [ 0 ];
if (myHep == romaji) {
result = hepburn[ i ] [ 1 ];
break;
}
}
return result;
}
function isNeedleInHaystack(needle, haystack) {
return haystack.indexOf(needle) != -1;
}
//////////// UTILITIES ////////////////////////////////
function debugPrint(blurb) {
document.write(blurb + '<BR>');
}
function assertEquals(first, second) {
if (first.toUpperCase() == second.toUpperCase()) {
return;
} else {
bugs = true;
var problem = first + " should equal " + second;
document.write(problem + "<BR>");
throw new Error(problem);
}
}
function assert(myBool) {
if (myBool) return;
else ("Error found");
}