Jump to content

Wikipedia:WikiProject Molecular Biology/Molecular and Cell Biology/Human protein-coding genes/doc

From Wikipedia, the free encyclopedia

When and how these list pages are updated

[edit]

The date of the most recent update to the HGNC database is listed immediately below the tables on this webpage. It appears to be updated daily.

Since the four human protein-coding gene index pages require regular updates to maintain currency with the HGNC database and to be of any use, the following Python script was written to fully-automate the update process. The list pages and the corresponding gene symbols listed in the navbox are updated once per week by User:Seppi333Bot.

For questions, comments, or edit requests pertaining to the lists of human protein-coding genes, please use this page's talk page. Binding edits to the wikitables can only be implemented by revising the Python source code that generates the tables; this is because User:Seppi333Bot will overwrite edits by other users when it updates the HGNC database entries on both pages.

[edit]

See User:Seppi333/GeneListNLP. This algorithm detects links to non-gene-related pages.

Python 3 code for generating the wikitables

[edit]

Public PAWS link to the current algorithm: [1]

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
import csv
import io
import sys
import os
import ftplib
from datetime import datetime
import pywikibot as pybot

startTime = datetime.now()

# Setting the directory to the script/executable file location
path = os.path.dirname(sys.argv[0])
os.chdir(path)
print("Current working directory: ", os.getcwd(), "\n")

# Setting input/output variables
readFile = 'protein-coding_gene.txt'
writeFile1 = 'Human protein-coding genes wikitext 1.txt'
writeFile2 = 'Human protein-coding genes wikitext 2.txt'
writeFile3 = 'Human protein-coding genes wikitext 3.txt'
writeFile4 = 'Human protein-coding genes wikitext 4.txt'


# The dictionary below contains gene symbols listed in the Dablinks tool as dictionary keys and piped links, sans brackets, as values.
# This dictionary also contains similar key-value pairs for non-gene-related articles that are located at a gene symbol's pagename.
# Comments are included to indicate which gene symbols are dablinks vs links to an unrelated article.
# The dictionary is used in the forWP() function to write a piped wikilink instead of a plain link if the gene symbol is a dictionary key.

mistargetedLinkDictionary = {
	# Approved gene symbols (links to disambiguation pages):
	'A2M' : 'A2M (gene)|A2M',
	'AAAS' : 'AAAS (gene)|AAAS',
	'AACS' : 'AACS (gene)|AACS',
	'AASS' : 'AASS (gene)|AASS',
	'AATF' : 'AATF (gene)|AATF',
	'ABO' : 'ABO (gene)|ABO',
	'ABR' : 'ABR (gene)|ABR',
	'ABRA' : 'ABRA (gene)|ABRA',
	'ACAN' : 'ACAN (gene)|ACAN',
	'ACCS' : 'ACCS (gene)|ACCS',
	'ACD' : 'ACD (gene)|ACD',
	'ACE' : 'ACE (gene)|ACE',
	'ACR' : 'ACR (gene)|ACR',
	'ADA' : 'ADA (gene)|ADA',
	'ADGB' : 'ADGB (gene)|ADGB',
	'ADK' : 'ADK (gene)|ADK',
	'ADM' : 'ADM (gene)|ADM',
	'ADNP' : 'ADNP (gene)|ADNP',
	'ADO' : 'ADO (gene)|ADO',
	'AEN' : 'AEN (gene)|AEN',
	'AFM' : 'AFM (gene)|AFM',
	'AFP' : 'AFP (gene)|AFP',
	'AGA' : 'AGA (gene)|AGA',
	'AGK' : 'AGK (gene)|AGK',
	'AGL' : 'AGL (gene)|AGL',
	'AGPS' : 'AGPS (gene)|AGPS',
	'AGRP' : 'AGRP (gene)|AGRP',
	'AGT' : 'AGT (gene)|AGT',
	'AHRR' : 'AHRR (gene)|AHRR',
	'AIDA' : 'AIDA (gene)|AIDA',
	'AIP' : 'AIP (gene)|AIP',
	'ALB' : 'ALB (gene)|ALB',
	'ALK' : 'ALK (gene)|ALK',
	'ALPG' : 'ALPG (gene)|ALPG',
	'AMH' : 'AMH (gene)|AMH',
	'AMN' : 'AMN (gene)|AMN',
	'AMT' : 'AMT (gene)|AMT',
	'ANG' : 'ANG (gene)|ANG',
	'APC' : 'APC (gene)|APC',
	'APCS' : 'APCS (gene)|APCS',
	'APOE' : 'APOE (gene)|APOE',
	'APP' : 'APP (gene)|APP',
	'APRT' : 'APRT (gene)|APRT',
	'AQR' : 'AQR (gene)|AQR',
	'AR' : 'AR (gene)|AR',
	'ARC' : 'ARC (gene)|ARC',
	'ARNT' : 'ARNT (gene)|ARNT',
	'ARSA' : 'ARSA (gene)|ARSA',
	'ARSD' : 'ARSD (gene)|ARSD',
	'ARX' : 'ARX (gene)|ARX',
	'ASIP' : 'ASIP (gene)|ASIP',
	'ASPA' : 'ASPA (gene)|ASPA',
	'ASPM' : 'ASPM (gene)|ASPM',
	'ASPN' : 'ASPN (gene)|ASPN',
	'ATIC' : 'ATIC (gene)|ATIC',
	'ATM' : 'ATM (gene)|ATM',
	'ATR' : 'ATR (gene)|ATR',
	'AUH' : 'AUH (gene)|AUH',
	'AVP' : 'AVP (gene)|AVP',
	'AXL' : 'AXL (gene)|AXL',
	'B2M' : 'B2M (gene)|B2M',
	'BAD' : 'BAD (gene)|BAD',
	'BAX' : 'BAX (gene)|BAX',
	'BBX' : 'BBX (gene)|BBX',
	'BCAM' : 'BCAM (gene)|BCAM',
	'BCOR' : 'BCOR (gene)|BCOR',
	'BCR' : 'BCR (gene)|BCR',
	'BGN' : 'BGN (gene)|BGN',
	'BID' : 'BID (gene)|BID',
	'BIK' : 'BIK (gene)|BIK',
	'BLK' : 'BLK (gene)|BLK',
	'BLM' : 'BLM (gene)|BLM',
	'BMF' : 'BMF (gene)|BMF',
	'BMP1' : 'BMP1 (gene)|BMP1',
	'BMP2' : 'BMP2 (gene)|BMP2',
	'BMP3' : 'BMP3 (gene)|BMP3',
	'BOC' : 'BOC (gene)|BOC',
	'BOK' : 'BOK (gene)|BOK',
	'BPI' : 'BPI (gene)|BPI',
	'BRAF' : 'BRAF (gene)|BRAF',
	'BSG' : 'BSG (gene)|BSG',
	'BSN' : 'BSN (gene)|BSN',
	'BSX' : 'BSX (gene)|BSX',
	'BTC' : 'BTC (gene)|BTC',
	'BTK' : 'BTK (gene)|BTK',
	'BTRC' : 'BTRC (gene)|BTRC',
	'C2' : 'C2 (gene)|C2',
	'C3' : 'C3 (gene)|C3',
	'C5' : 'C5 (gene)|C5',
	'C6' : 'C6 (gene)|C6',
	'C7' : 'C7 (gene)|C7',
	'C9' : 'C9 (gene)|C9',
	'CA1' : 'CA1 (gene)|CA1',
	'CA2' : 'CA2 (gene)|CA2',
	'CA3' : 'CA3 (gene)|CA3',
	'CA4' : 'CA4 (gene)|CA4',
	'CA6' : 'CA6 (gene)|CA6',
	'CA7' : 'CA7 (gene)|CA7',
	'CA9' : 'CA9 (gene)|CA9',
 	'CALR' : 'CALR (gene)|CALR',
	'CAMP' : 'CAMP (gene)|CAMP',
	'CAPS' : 'CAPS (gene)|CAPS',
	'CARF' : 'CARF (gene)|CARF',
	'CASR' : 'CASR (gene)|CASR',
	'CAST' : 'CAST (gene)|CAST',
	'CAT' : 'CAT (gene)|CAT',
	'CAV1' : 'CAV1 (gene)|CAV1',
	'CAV3' : 'CAV3 (gene)|CAV3',
	'CBL' : 'CBL (gene)|CBL',
	'CBLB' : 'CBLB (gene)|CBLB',
	'CBX2' : 'CBX2 (gene)|CBX2',
	'CBX5' : 'CBX5 (gene)|CBX5',
	'CBX7' : 'CBX7 (gene)|CBX7',
	'CCK' : 'CCK (gene)|CCK',
	'CCN1' : 'CCN1 (gene)|CCN1',
	'CCNI' : 'CCNI (gene)|CCNI',
	'CCR3' : 'CCR3 (gene)|CCR3',
	'CCS' : 'CCS (gene)|CCS',
	'CD5' : 'CD5 (gene)|CD5',
	'CDA' : 'CDA (gene)|CDA',
	'CDH1' : 'CDH1 (gene)|CDH1',
	'CDH3' : 'CDH3 (gene)|CDH3',
	'CDNF' : 'CDNF (gene)|CDNF',
	'CDR1' : 'CDR1 (gene)|CDR1',
	'CDR2' : 'CDR2 (gene)|CDR2',
	'CDV3' : 'CDV3 (gene)|CDV3',
	'CDX4' : 'CDX4 (gene)|CDX4',
	'CEL' : 'CEL (gene)|CEL',
	'CER1' : 'CER1 (gene)|CER1',
	'CFB' : 'CFB (gene)|CFB',
	'CFD' : 'CFD (gene)|CFD',
	'CFH' : 'CFH (gene)|CFH',
	'CFI' : 'CFI (gene)|CFI',
	'CFL2' : 'CFL2 (gene)|CFL2',
	'CFP' : 'CFP (gene)|CFP',
	'CGA' : 'CGA (gene)|CGA',
	'CGN' : 'CGN (gene)|CGN',
	'CHAT' : 'CHAT (gene)|CHAT',
	'CHGB' : 'CHGB (gene)|CHGB',
	'CHKB' : 'CHKB (gene)|CHKB',
	'CHM' : 'CHM (gene)|CHM',
	'CHRD' : 'CHRD (gene)|CHRD',
	'CIC' : 'CIC (gene)|CIC',
	'CISH' : 'CISH (gene)|CISH',
	'CIT' : 'CIT (gene)|CIT',
	'CKB' : 'CKB (gene)|CKB',
	'CKM' : 'CKM (gene)|CKM',
	'CLC' : 'CLC (gene)|CLC',
	'CLPP' : 'CLPP (gene)|CLPP',
	'CLPS' : 'CLPS (gene)|CLPS',
	'CLU' : 'CLU (gene)|CLU',
	'CMAS' : 'CMAS (gene)|CMAS',
	'CMIP' : 'CMIP (gene)|CMIP',
	'CNP' : 'CNP (gene)|CNP',
	'COCH' : 'COCH (gene)|COCH',
	'COIL' : 'COIL (gene)|COIL',
	'COMP' : 'COMP (gene)|COMP',
	'COPA' : 'COPA (gene)|COPA',
	'COPE' : 'COPE (gene)|COPE',
	'COQ3' : 'COQ3 (gene)|COQ3',
	'COQ5' : 'COQ5 (gene)|COQ5',
	'CP' : 'CP (gene)|CP',
	'CPD' : 'CPD (gene)|CPD',
	'CPE' : 'CPE (gene)|CPE',
	'CPM' : 'CPM (gene)|CPM',
	'CPO' : 'CPO (gene)|CPO',
	'CPOX' : 'CPOX (gene)|CPOX',
	'CPQ' : 'CPQ (gene)|CPQ',
	'CPS1' : 'CPS1 (gene)|CPS1',
	'CPT2' : 'CPT2 (gene)|CPT2',
	'CPTP' : 'CPTP (gene)|CPTP',
	'CPZ' : 'CPZ (gene)|CPZ',
	'CR1' : 'CR1 (gene)|CR1',
	'CR2' : 'CR2 (gene)|CR2',
	'CRAT' : 'CRAT (gene)|CRAT',
	'CREM' : 'CREM (gene)|CREM',
	'CRH' : 'CRH (gene)|CRH',
	'CRK' : 'CRK (gene)|CRK',
	'CRP' : 'CRP (gene)|CRP',
	'CRX' : 'CRX (gene)|CRX',
	'CS' : 'CS (gene)|CS',
	'CSK' : 'CSK (gene)|CSK',
	'CST3' : 'CST3 (gene)|CST3',
	'CST6' : 'CST6 (gene)|CST6',
	'CST7' : 'CST7 (gene)|CST7',
	'CST8' : 'CST8 (gene)|CST8',
	'CSTA' : 'CSTA (gene)|CSTA',
	'CTH' : 'CTH (gene)|CTH',
	'CTNS' : 'CTNS (gene)|CTNS',
	'CTRC' : 'CTRC (gene)|CTRC',
	'CTRL' : 'CTRL (gene)|CTRL',
	'CTSA' : 'CTSA (gene)|CTSA',
	'CTSC' : 'CTSC (gene)|CTSC',
	'CTSL' : 'CTSL (gene)|CTSL',
	'CTSS' : 'CTSS (gene)|CTSS',
	'CUTC' : 'CUTC (gene)|CUTC',
	'CYBA' : 'CYBA (gene)|CYBA',
	'CYCS' : 'CYCS (gene)|CYCS',
	'CYGB' : 'CYGB (gene)|CYGB',
	'CYLD' : 'CYLD (gene)|CYLD',
	'DAO' : 'DAO (gene)|DAO',
	'DAP' : 'DAP (gene)|DAP',
	'DBH' : 'DBH (gene)|DBH',
	'DBI' : 'DBI (gene)|DBI',
	'DBNL' : 'DBNL (gene)|DBNL',
	'DBP' : 'DBP (gene)|DBP',
	'DBT' : 'DBT (gene)|DBT',
	'DCC' : 'DCC (gene)|DCC',
	'DCD' : 'DCD (gene)|DCD',
	'DCK' : 'DCK (gene)|DCK',
	'DCN' : 'DCN (gene)|DCN',
	'DCPS' : 'DCPS (gene)|DCPS',
	'DCT' : 'DCT (gene)|DCT',
	'DCX' : 'DCX (gene)|DCX',
	'DDC' : 'DDC (gene)|DDC',
	'DDN' : 'DDN (gene)|DDN',
	'DDO' : 'DDO (gene)|DDO',
	'DDR2' : 'DDR2 (gene)|DDR2',
	'DEK' : 'DEK (gene)|DEK',
	'DENR' : 'DENR (gene)|DENR',
	'DERA' : 'DERA (gene)|DERA',
	'DES' : 'DES (gene)|DES',
	'DFFB' : 'DFFB (gene)|DFFB', 
	'DHH' : 'DHH (gene)|DHH',
	'DIABLO' : 'DIABLO (gene)|DIABLO',
	'DLD' : 'DLD (gene)|DLD',
	'DMD' : 'DMD (gene)|DMD',
	'DMPK' : 'DMPK (gene)|DMPK',
	'DOLK' : 'DOLK (gene)|DOLK',
	'DPT' : 'DPT (gene)|DPT',
	'DSE' : 'DSE (gene)|DSE',
	'DSP' : 'DSP (gene)|DSP',
	'DUT' : 'DUT (gene)|DUT',
	'EBP' : 'EBP (gene)|EBP',
	'ECD' : 'ECD (gene)|ECD',
	'EDA' : 'EDA (gene)|EDA',
	'EED' : 'EED (gene)|EED',
	'EFS' : 'EFS (gene)|EFS',
	'EGF' : 'EGF (gene)|EGF',
	'EGFR' : 'EGFR (gene)|EGFR',
	'EHF' : 'EHF (gene)|EHF',
	'ELL' : 'ELL (gene)|ELL',
	'ELN' : 'ELN (gene)|ELN',
	'EMB' : 'EMB (gene)|EMB',
	'EMC2' : 'EMC2 (gene)|EMC2',
	'EMD' : 'EMD (gene)|EMD',
	'ENAH' : 'ENAH (gene)|ENAH',
	'ENG' : 'ENG (gene)|ENG',
	'EPO' : 'EPO (gene)|EPO',
	'EPOR' : 'EPOR (gene)|EPOR',
	'EPX' : 'EPX (gene)|EPX',
	'ERAS' : 'ERAS (gene)|ERAS',
	'ERC2' : 'ERC2 (gene)|ERC2',
	'ERF' : 'ERF (gene)|ERF',
	'ERG' : 'ERG (gene)|ERG',
	'ERH' : 'ERH (gene)|ERH',
	'ESD' : 'ESD (gene)|ESD',
	'EVC' : 'EVC (gene)|EVC',
	'EVL' : 'EVL (gene)|EVL',
	'EVPL' : 'EVPL (gene)|EVPL',
	'EXT2' : 'EXT2 (gene)|EXT2',
	'EZR' : 'EZR (gene)|EZR',
	'F10' : 'F10 (gene)|F10',
	'F11' : 'F11 (gene)|F11',
	'F12' : 'F12 (gene)|F12',
	'F2' : 'F2 (gene)|F2',
	'F3' : 'F3 (gene)|F3',
	'F5' : 'F5 (gene)|F5',
	'F7' : 'F7 (gene)|F7',
	'F8' : 'F8 (gene)|F8',
	'F9' : 'F9 (gene)|F9',
	'FAH' : 'FAH (gene)|FAH',
	'FAP' : 'FAP (gene)|FAP',
	'FAS' : 'FAS (gene)|FAS',
	'FASN' : 'FASN (gene)|FASN',
	'FAU' : 'FAU (gene)|FAU',
	'FBL' : 'FBL (gene)|FBL',
	'FECH' : 'FECH (gene)|FECH',
	'FER' : 'FER (gene)|FER',
	'FES' : 'FES (gene)|FES',
	'FEV' : 'FEV (gene)|FEV',
	'FGA' : 'FGA (gene)|FGA',
	'FGB' : 'FGB (gene)|FGB',
	'FGG' : 'FGG (gene)|FGG',
	'FGR' : 'FGR (gene)|FGR',
	'FH' : 'FH (gene)|FH',
	'FLG' : 'FLG (gene)|FLG',
	'FN3K' : 'FN3K (gene)|FN3K',
	'FOS' : 'FOS (gene)|FOS',
	'FPGS' : 'FPGS (gene)|FPGS',
	'FRK' : 'FRK (gene)|FRK',
	'FST' : 'FST (gene)|FST',
	'FTL' : 'FTL (gene)|FTL',
	'FTO' : 'FTO (gene)|FTO',
	'FUS' : 'FUS (gene)|FUS',
	'GAA' : 'GAA (gene)|GAA',
	'GAK' : 'GAK (gene)|GAK',
	'GAL' : 'GAL (gene)|GAL',
	'GALK2' : 'GALK2 (gene)|GALK2',
	'GALT' : 'GALT (gene)|GALT',
	'GAN' : 'GAN (gene)|GAN',
	'GART' : 'GART (gene)|GART',
	'GATC' : 'GATC (gene)|GATC',
	'GC' : 'GC (gene)|GC',
	'GCA' : 'GCA (gene)|GCA',
	'GCG' : 'GCG (gene)|GCG',
	'GCK' : 'GCK (gene)|GCK',
	'GCNA' : 'GCNA (gene)|GCNA',
	'GDA' : 'GDA (gene)|GDA',
	'GEM' : 'GEM (gene)|GEM',
	'GFAP' : 'GFAP (gene)|GFAP',
	'GGH' : 'GGH (gene)|GGH',
	'GGN' : 'GGN (gene)|GGN',
	'GH1' : 'GH1 (gene)|GH1',
	'GH2' : 'GH2 (gene)|GH2',
	'GHR' : 'GHR (gene)|GHR',
	'GIP' : 'GIP (gene)|GIP',
	'GK' : 'GK (gene)|GK',
	'GK2' : 'GK2 (gene)|GK2',
	'GLA' : 'GLA (gene)|GLA',
	'GLS' : 'GLS (gene)|GLS',
	'GML' : 'GML (gene)|GML',
	'GNAS' : 'GNAS (gene)|GNAS',
	'GNE' : 'GNE (gene)|GNE',
	'GNS' : 'GNS (gene)|GNS',
	'GP2' : 'GP2 (gene)|GP2',
	'GP5' : 'GP5 (gene)|GP5',
	'GP9' : 'GP9 (gene)|GP9',
	'GPI' : 'GPI (gene)|GPI',
	'GPT' : 'GPT (gene)|GPT',
	'GPT2' : 'GPT2 (gene)|GPT2',
	'GRASP' : 'GRASP (gene)|GRASP',
	'GRIP1' : 'GRIP1 (gene)|GRIP1',
	'GRN' : 'GRN (gene)|GRN',
	'GRP' : 'GRP (gene)|GRP',
	'GSC' : 'GSC (gene)|GSC',
	'GSN' : 'GSN (gene)|GSN',
	'GSR' : 'GSR (gene)|GSR',
	'GSS' : 'GSS (gene)|GSS',
	'HAL' : 'HAL (gene)|HAL',
	'HBM' : 'HBM (gene)|HBM',
	'HBZ' : 'HBZ (gene)|HBZ',
	'HCCS' : 'HCCS (gene)|HCCS',
	'HCST' : 'HCST (gene)|HCST',
	'HDC' : 'HDC (gene)|HDC',
	'HDGF' : 'HDGF (gene)|HDGF',
	'HDX' : 'HDX (gene)|HDX',
	'HFE' : 'HFE (gene)|HFE',
	'HGD' : 'HGD (gene)|HGD',
	'HGF' : 'HGF (gene)|HGF',
	'HGS' : 'HGS (gene)|HGS',
	'HJV' : 'HJV (gene)|HJV',
	'HLF' : 'HLF (gene)|HLF',
	'HLX' : 'HLX (gene)|HLX',
	'HMBS' : 'HMBS (gene)|HMBS',
	'HP' : 'HP (gene)|HP',
	'HPD' : 'HPD (gene)|HPD',
	'HPN' : 'HPN (gene)|HPN',
	'HPR' : 'HPR (gene)|HPR',
	'HPSE' : 'HPSE (gene)|HPSE',
	'HR' : 'HR (gene)|HR',
	'HRC' : 'HRC (gene)|HRC',
	'HRG' : 'HRG (gene)|HRG',
	'HRK' : 'HRK (gene)|HRK',
	'HTT' : 'HTT (gene)|HTT',
	'HUNK' : 'HUNK (gene)|HUNK',
	'HYI' : 'HYI (gene)|HYI',
	'IAPP' : 'IAPP (gene)|IAPP',
	'IDE' : 'IDE (gene)|IDE',
	'IDS' : 'IDS (gene)|IDS',
	'IGH' : 'IGH (gene)|IGH',
	'IGK' : 'IGK (gene)|IGK',
	'IGL' : 'IGL (gene)|IGL',
	'IHH' : 'IHH (gene)|IHH',
	'IK' : 'IK (gene)|IK',
	'IL2' : 'IL2 (gene)|IL2',
	'IL3' : 'IL3 (gene)|IL3',
	'IL4' : 'IL4 (gene)|IL4',
	'IL5' : 'IL5 (gene)|IL5',
	'IL6' : 'IL6 (gene)|IL6',
	'IL7' : 'IL7 (gene)|IL7',
	'IL9' : 'IL9 (gene)|IL9',
	'IL10' : 'IL10 (gene)|IL10',
	'IL11' : 'IL11 (gene)|IL11', 
	'IL13' : 'IL13 (gene)|IL13',
	'IL15' : 'IL15 (gene)|IL15',
	'IL16' : 'IL16 (gene)|IL16',
	'IL18' : 'IL18 (gene)|IL18',
	'IL19' : 'IL19 (gene)|IL19',
	'IL20' : 'IL20 (gene)|IL20',
	'IL21' : 'IL21 (gene)|IL21',
	'IL22' : 'IL22 (gene)|IL22',
	'IL24' : 'IL24 (gene)|IL24', 
	'IL25' : 'IL25 (gene)|IL25', 
	'IL26' : 'IL26 (gene)|IL26', 
	'IL32' : 'IL32 (gene)|IL32', 
	'IMPACT' : 'IMPACT (gene)|IMPACT',
	'INA' : 'INA (gene)|INA',
	'INS' : 'INS (gene)|INS',
	'INSC' : 'INSC (gene)|INSC',
	'INTU' : 'INTU (gene)|INTU',
	'IPP' : 'IPP (gene)|IPP',
	'ITK' : 'ITK (gene)|ITK',
	'IVD' : 'IVD (gene)|IVD',
	'IVL' : 'IVL (gene)|IVL',
	'JMY' : 'JMY (gene)|JMY',
	'JRK' : 'JRK (gene)|JRK',
	'JTB' : 'JTB (gene)|JTB',
	'JUN' : 'JUN (gene)|JUN',
	'JUP' : 'JUP (gene)|JUP',
	'KCP' : 'KCP (gene)|KCP',
	'KDR' : 'KDR (gene)|KDR',
	'KERA' : 'KERA (gene)|KERA',
	'KHK' : 'KHK (gene)|KHK',
	'KIN' : 'KIN (gene)|KIN',
	'KIT' : 'KIT (gene)|KIT',
	'KL' : 'KL (gene)|KL',
	'KPRP' : 'KPRP (gene)|KPRP',
	'KPTN' : 'KPTN (gene)|KPTN',
	'KY' : 'KY (gene)|KY',
	'LAT' : 'LAT (gene)|LAT',
	'LBH' : 'LBH (gene)|LBH',
	'LBP' : 'LBP (gene)|LBP',
	'LBR' : 'LBR (gene)|LBR',
	'LCAT' : 'LCAT (gene)|LCAT',
	'LCK' : 'LCK (gene)|LCK',
	'LCT' : 'LCT (gene)|LCT',
	'LHB' : 'LHB (gene)|LHB',
	'LIAS' : 'LIAS (gene)|LIAS',
	'LIF' : 'LIF (gene)|LIF',
	'LIPA' : 'LIPA (gene)|LIPA',
	'LIPC' : 'LIPC (gene)|LIPC',
	'LIPE' : 'LIPE (gene)|LIPE',
	'LIPH' : 'LIPH (gene)|LIPH',
	'LPA' : 'LPA (gene)|LPA',
	'LPL' : 'LPL (gene)|LPL',
	'LPO' : 'LPO (gene)|LPO',
	'LPP' : 'LPP (gene)|LPP',
	'LRAT' : 'LRAT (gene)|LRAT',
	'LRP4' : 'LRP4 (gene)|LRP4',
	'LSR' : 'LSR (gene)|LSR',
	'LSS' : 'LSS (gene)|LSS',
	'LTA' : 'LTA (gene)|LTA',
	'LTB' : 'LTB (gene)|LTB',
	'LTBR' : 'LTBR (gene)|LTBR',
	'LTF' : 'LTF (gene)|LTF',
	'LTK' : 'LTK (gene)|LTK',
	'LUM' : 'LUM (gene)|LUM',
	'MADD' : 'MADD (gene)|MADD',
	'MAF' : 'MAF (gene)|MAF',
	'MAFB' : 'MAFB (gene)|MAFB',
	'MAFF' : 'MAFF (gene)|MAFF',
	'MAG' : 'MAG (gene)|MAG',
	'MAK' : 'MAK (gene)|MAK',
	'MAL' : 'MAL (gene)|MAL',
	'MANBA' : 'MANBA (gene)|MANBA',
	'MASP2' : 'MASP2 (gene)|MASP2',
	'MATK' : 'MATK (gene)|MATK',
	'MAX' : 'MAX (gene)|MAX',
	'MAZ' : 'MAZ (gene)|MAZ',
	'MB' : 'MB (gene)|MB',
	'MBP' : 'MBP (gene)|MBP',
	'MCC' : 'MCC (gene)|MCC',
	'MCU' : 'MCU (gene)|MCU',
	'MDK' : 'MDK (gene)|MDK',
	'ME1' : 'ME1 (gene)|ME1',
	'ME2' : 'ME2 (gene)|ME2',
	'ME3' : 'ME3 (gene)|ME3',
	'MESD' : 'MESD (gene)|MESD',
	'MEST' : 'MEST (gene)|MEST',
	'MET' : 'MET (gene)|MET',
	'MFF' : 'MFF (gene)|MFF',
	'MGA' : 'MGA (gene)|MGA',
	'MGP' : 'MGP (gene)|MGP',
	'MIA' : 'MIA (gene)|MIA',
	'MIB1' : 'MIB1 (gene)|MIB1',
	'MICA' : 'MICA (gene)|MICA',
	'MIF' : 'MIF (gene)|MIF',
	'MIP' : 'MIP (gene)|MIP',
	'MLN' : 'MLN (gene)|MLN',
	'MLX' : 'MLX (gene)|MLX',
	'MMD' : 'MMD (gene)|MMD',
	'MME' : 'MME (gene)|MME',
	'MN1' : 'MN1 (gene)|MN1',
	'MNT' : 'MNT (gene)|MNT',
	'MOG' : 'MOG (gene)|MOG',
	'MOS' : 'MOS (gene)|MOS',
	'MPG' : 'MPG (gene)|MPG',
	'MPI' : 'MPI (gene)|MPI',
	'MPL' : 'MPL (gene)|MPL',
	'MPO' : 'MPO (gene)|MPO',
	'MR1' : 'MR1 (gene)|MR1',
	'MRM2' : 'MRM2 (gene)|MRM2',
	'MRO' : 'MRO (gene)|MRO',
	'MSC' : 'MSC (gene)|MSC',
	'MSRA' : 'MSRA (gene)|MSRA',
	'MT3' : 'MT3 (gene)|MT3',
	'MTRR' : 'MTRR (gene)|MTRR',
	'MYB' : 'MYB (gene)|MYB',
	'NADK' : 'NADK (gene)|NADK',
	'NAIP' : 'NAIP (gene)|NAIP',
	'NANOG' : 'NANOG (gene)|NANOG',
	'NAPA' : 'NAPA (gene)|NAPA',
	'NARF' : 'NARF (gene)|NARF',
	'NASP' : 'NASP (gene)|NASP',
	'NBAS' : 'NBAS (gene)|NBArS',
	'NBN' : 'NBN (gene)|NBN',
	'NCL' : 'NCL (gene)|NCL',
	'NCS1' : 'NCS1 (gene)|NCS1',
	'NDN' : 'NDN (gene)|NDN',
	'NDP' : 'NDP (gene)|NDP',
	'NEB' : 'NEB (gene)|NEB',
	'NF1' : 'NF1 (gene)|NF1',
	'NF2' : 'NF2 (gene)|NF2',
	'NFIB' : 'NFIB (gene)|NFIB',
	'NGB' : 'NGB (gene)|NGB',
	'NGF' : 'NGF (gene)|NGF',
	'NIN' : 'NIN (gene)|NIN',
	'NLN' : 'NLN (gene)|NLN',
	'NMB' : 'NMB (gene)|NMB',
	'NMI' : 'NMI (gene)|NMI',
	'NMS' : 'NMS (gene)|NMS',
	'NMT1' : 'NMT1 (gene)|NMT1',
	'NMU' : 'NMU (gene)|NMU',
	'NNT' : 'NNT (gene)|NNT',
	'NOG' : 'NOG (gene)|NOG',
	'NPAT' : 'NPAT (gene)|NPAT',
	'NPL' : 'NPL (gene)|NPL',
	'NPPA' : 'NPPA (gene)|NPPA',
	'NPPC' : 'NPPC (gene)|NPPC',
	'NPS' : 'NPS (gene)|NPS',
	'NPY' : 'NPY (gene)|NPY',
	'NQO2' : 'NQO2 (gene)|NQO2',
	'NRAS' : 'NRAS (gene)|NRAS',
	'NRDC' : 'NRDC (gene)|NRDC',
	'NRM' : 'NRM (gene)|NRM',
	'NSF' : 'NSF (gene)|NSF',
	'NTM' : 'NTM (gene)|NTM',
	'NTS' : 'NTS (gene)|NTS',
	'NUMB' : 'NUMB (gene)|NUMB',
	'NYX' : 'NYX (gene)|NYX',
	'OAF' : 'OAF (gene)|OAF',
	'OAT' : 'OAT (gene)|OAT',
	'OCM' : 'OCM (gene)|OCM',
	'OGA' : 'OGA (gene)|OGA',
	'OGN' : 'OGN (gene)|OGN',
	'OGT' : 'OGT (gene)|OGT',
	'OMD' : 'OMD (gene)|OMD',
	'OMG' : 'OMG (gene)|OMG',
	'OMP' : 'OMP (gene)|OMP',
	'OPTN' : 'OPTN (gene)|OPTN',
	'OS9' : 'OS9 (gene)|OS9',
	'OSM' : 'OSM (gene)|OSM',
	'OSTC' : 'OSTC (gene)|OSTC',
	'OTC' : 'OTC (gene)|OTC',
	'OTP' : 'OTP (gene)|OTP',
	'OXT' : 'OXT (gene)|OXT',
	'PAH' : 'PAH (gene)|PAH',
	'PAM' : 'PAM (gene)|PAM',
	'PBK' : 'PBK (gene)|PBK',
	'PC' : 'PC (gene)|PC',
	'PCCA' : 'PCCA (gene)|PCCA',
	'PDC' : 'PDC (gene)|PDC',
	'PEMT' : 'PEMT (gene)|PEMT',
	'PFAS' : 'PFAS (gene)|PFAS',
	'PGC' : 'PGC (gene)|PGC',
	'PGD' : 'PGD (gene)|PGD',
	'PGF' : 'PGF (gene)|PGF',
	'PGP' : 'PGP (gene)|PGP',
	'PGR' : 'PGR (gene)|PGR',
	'PHB' : 'PHB (gene)|PHB',
	'PI3' : 'PI3 (gene)|PI3',
	'PIGS' : 'PIGS (gene)|PIGS',
	'PIN4' : 'PIN4 (gene)|PIN4',
	'PIP' : 'PIP (gene)|PIP',
	'PIR' : 'PIR (gene)|PIR',
	'PISD' : 'PISD (gene)|PISD',
	'PKM' : 'PKM (gene)|PKM',
	'PLAT' : 'PLAT (gene)|PLAT',
	'PLG' : 'PLG (gene)|PLG',
	'PLN' : 'PLN (gene)|PLN',
	'PMEL' : 'PMEL (gene)|PMEL',
	'PML' : 'PML (gene)|PML',
	'PNN' : 'PNN (gene)|PNN',
	'PNP' : 'PNP (gene)|PNP',
	'POLB' : 'POLB (gene)|POLB',
	'POLE' : 'POLE (gene)|POLE',
	'POR' : 'POR (gene)|POR',
	'PPID' : 'PPID (gene)|PPID',
	'PRCC' : 'PRCC (gene)|PRCC',
	'PRL' : 'PRL (gene)|PRL',
	'PRLR' : 'PRLR (gene)|PRLR',
	'PROC' : 'PROC (gene)|PROC',
	'PROZ' : 'PROZ (gene)|PROZ',
	'PRX' : 'PRX (gene)|PRX',
	'PRY' : 'PRY (gene)|PRY',
	'PSAP' : 'PSAP (gene)|PSAP',
	'PSCA' : 'PSCA (gene)|PSCA',
	'PSD' : 'PSD (gene)|PSD',
	'PTEN' : 'PTEN (gene)|PTEN',
	'PTH' : 'PTH (gene)|PTH',
	'PTMS' : 'PTMS (gene)|PTMS',
	'PTN' : 'PTN (gene)|PTN',
	'PTS' : 'PTS (gene)|PTS',
	'PVR' : 'PVR (gene)|PVR',
	'PZP' : 'PZP (gene)|PZP',
	'RAD1' : 'RAD1 (gene)|RAD1',
	'RAD50' : 'RAD50 (gene)|RAD50',
	'RAMAC' : 'RAMAC (gene)|RAMAC',
	'RAN' : 'RAN (gene)|RAN',
	'RARA' : 'RARA (gene)|RARA',
	'RARB' : 'RARB (gene)|RARB',
	'REN' : 'REN (gene)|REN',
	'RET' : 'RET (gene)|RET',
	'RETN' : 'RETN (gene)|RETN',
	'RGN' : 'RGN (gene)|RGN',
	'RGR' : 'RGR (gene)|RGR',
	'RHCE' : 'RHCE (gene)|RHCE',
	'RHD' : 'RHD (gene)|RHD',
	'RILP' : 'RILP (gene)|RILP',
	'RLF' : 'RLF (gene)|RLF',
	'RORC' : 'RORC (gene)|RORC',
	'RP2' : 'RP2 (gene)|RP2',
	'RPE' : 'RPE (gene)|RPE',
	'RPS5' : 'RPS5 (gene)|RPS5',
	'RRAD' : 'RRAD (gene)|RRAD',
	'RS1' : 'RS1 (gene)|RS1',
	'RTCA' : 'RTCA (gene)|RTCA',
	'RYK' : 'RYK (gene)|RYK',
	'SACS' : 'SACS (gene)|SACS',
	'SAG' : 'SAG (gene)|SAG',
	'SARAF' : 'SARAF (gene)|SARAF',
	'SAT1' : 'SAT1 (gene)|SAT1',
	'SCAI' : 'SCAI (gene)|SCAI',
	'SCAP' : 'SCAP (gene)|SCAP',
	'SCD' : 'SCD (gene)|SCD',
	'SCT' : 'SCT (gene)|SCT',
	'SCX' : 'SCX (gene)|SCX',
	'SDF2' : 'SDF2 (gene)|SDF2',
	'SDS' : 'SDS (gene)|SDS',
	'SDSL' : 'SDSL (gene)|SDSL',
	'SET' : 'SET (gene)|SET',
	'SF1' : 'SF1 (gene)|SF1',
	'SFN' : 'SFN (gene)|SFN',
	'SGCD' : 'SGCD (gene)|SGCD',
	'SHB' : 'SHB (gene)|SHB',
	'SHE' : 'SHE (gene)|SHE',
	'SHF' : 'SHF (gene)|SHF',
	'SHH' : 'SHH (gene)|SHH',
	'SKI' : 'SKI (gene)|SKI',
	'SLA' : 'SLA (gene)|SLA',
	'SLK' : 'SLK (gene)|SLK',
	'SLN' : 'SLN (gene)|SLN',
	'SMO' : 'SMO (gene)|SMO',
	'SNCA' : 'SNCA (gene)|SNCA',
	'SNN' : 'SNN (gene)|SNN',
	'SON' : 'SON (gene)|SON',
	'SOS2' : 'SOS2 (gene)|SOS2',
	'SP1' : 'SP1 (gene)|SP1',
	'SP2' : 'SP2 (gene)|SP2',
	'SP3' : 'SP3 (gene)|SP3',
	'SP4' : 'SP4 (gene)|SP4',
	'SP5' : 'SP5 (gene)|SP5',
	'SP6' : 'SP6 (gene)|SP6',
	'SP7' : 'SP7 (gene)|SP7',
	'SP8' : 'SP8 (gene)|SP8',
	'SP9' : 'SP9 (gene)|SP9',
	'SPIC' : 'SPIC (gene)|SPIC',
	'SPN' : 'SPN (gene)|SPN',
	'SPP1' : 'SPP1 (gene)|SPP1',
	'SPR' : 'SPR (gene)|SPR',
	'SPX' : 'SPX (gene)|SPX',
	'SRC' : 'SRC (gene)|SRC',
	'SRF' : 'SRF (gene)|SRF',
	'SRI' : 'SRI (gene)|SRI',
	'SRL' : 'SRL (gene)|SRL',
	'SRM' : 'SRM (gene)|SRM',
	'SRR' : 'SRR (gene)|SRR',
	'SRRT' : 'SRRT (gene)|SRRT',
	'SSB' : 'SSB (gene)|SSB',
	'SST' : 'SST (gene)|SST',
	'SSX4' : 'SSX4 (gene)|SSX4',
	'STAC' : 'STAC (gene)|STAC',
	'STAM' : 'STAM (gene)|STAM',
	'STAR' : 'STAR (gene)|STAR',
	'STH' : 'STH (gene)|STH',
	'STS' : 'STS (gene)|STS',
	'SYNC' : 'SYNC (gene)|SYNC',
	'TANK' : 'TANK (gene)|TANK',
	'TAT' : 'TAT (gene)|TAT',
	'TAZ' : 'TAZ (gene)|TAZ',
	'TBCC' : 'TBCC (gene)|TBCC',
	'TBK1' : 'TBK1 (gene)|TBK1',
	'TBP' : 'TBP (gene)|TBP',
	'TCAP' : 'TCAP (gene)|TCAP',
	'TDG' : 'TDG (gene)|TDG',
	'TEC' : 'TEC (gene)|TEC',
	'TEF' : 'TEF (gene)|TEF',
	'TEK' : 'TEK (gene)|TEK',
	'TES' : 'TES (gene)|TES',
	'TESC' : 'TESC (gene)|TESC',
	'TF' : 'TF (gene)|TF',
	'TFG' : 'TFG (gene)|TFG',
	'TG' : 'TG (gene)|TG',
	'TGFA' : 'TGFA (gene)|TGFA',
	'TH' : 'TH (gene)|TH',
	'THPO' : 'THPO (gene)|THPO',
	'TIMELESS' : 'TIMELESS (gene)|TIMELESS',
	'TKT' : 'TKT (gene)|TKT',
	'TNC' : 'TNC (gene)|TNC',
	'TNN' : 'TNN (gene)|TNN',
	'TNR' : 'TNR (gene)|TNR',
	'TPO' : 'TPO (gene)|TPO',
	'TPR' : 'TPR (gene)|TPR',
	'TPT1' : 'TPT1 (gene)|TPT1',
	'TRA' : 'TRA (gene)|TRA',
	'TRB' : 'TRB (gene)|TRB',
	'TRD' : 'TRD (gene)|TRD',
	'TRG' : 'TRG (gene)|TRG',
	'TRIL' : 'TRIL (gene)|TRIL',
	'TRIO' : 'TRIO (gene)|TRIO',
	'TRO' : 'TRO (gene)|TRO',
	'TSN' : 'TSN (gene)|TSN',
	'TSPO' : 'TSPO (gene)|TSPO',
	'TSR2' : 'TSR2 (gene)|TSR2',
	'TST' : 'TST (gene)|TST',
	'TTK' : 'TTK (gene)|TTK',
	'TTL' : 'TTL (gene)|TTL',
	'TTN' : 'TTN (gene)|TTN',
	'TTR' : 'TTR (gene)|TTR',
	'TUB' : 'TUB (gene)|TUB',
	'TXN' : 'TXN (gene)|TXN',
	'TYR' : 'TYR (gene)|TYR',
	'UBB' : 'UBB (gene)|UBB',
	'UBD' : 'UBD (gene)|UBD',
	'UCN' : 'UCN (gene)|UCN',
	'UMPS' : 'UMPS (gene)|UMPS',
	'UNG' : 'UNG (gene)|UNG',
	'UNK' : 'UNK (gene)|UNK',
	'UST' : 'UST (gene)|UST',
	'UTY' : 'UTY (gene)|UTY',
	'VCL' : 'VCL (gene)|VCL',
	'VCP' : 'VCP (gene)|VCP',
	'VCY' : 'VCY (gene)|VCY',
	'VDR' : 'VDR (gene)|VDR',
	'VIM' : 'VIM (gene)|VIM',
	'VIT' : 'VIT (gene)|VIT',
	'VWF' : 'VWF (gene)|VWF',
	'WAC' : 'WAC (gene)|WAC',
	'WAS' : 'WAS (gene)|WAS',
	'WASL' : 'WASL (gene)|WASL',
	'WIZ' : 'WIZ (gene)|WIZ',
	'WLS' : 'WLS (gene)|WLS',
	'WRN' : 'WRN (gene)|WRN',
	'XDH' : 'XDH (gene)|XDH',
	'XG' : 'XG (gene)|XG',
	'XK' : 'XK (gene)|XK',
	'XPC' : 'XPC (gene)|XPC',
	'ZAN' : 'ZAN (gene)|ZAN',
	'ZYX' : 'ZYX (gene)|ZYX',

	# Approved gene symbols (links to non-gene pages):
	'AAMP' : 'AAMP (gene)|AAMP',
	'AARD' : 'AARD (gene)|AARD',
	'ADSL' : 'ADSL (gene)|ADSL',
	'AK1' : 'AK1 (gene)|AK1',
	'AK4' : 'AK4 (gene)|AK4',
	'AK5' : 'AK5 (gene)|AK5',
	'AK9' : 'AK9 (gene)|AK9',
	'ALLC' : 'ALLC (gene)|ALLC',
	'APEH' : 'APEH (gene)|APEH',
	'APOD' : 'APOD (gene)|APOD',
	'ARSF' : 'ARSF (gene)|ARSF',
	'ASL' : 'ASL (gene)|ASL',
	'ATRIP' : 'ATRIP (gene)|ATRIP',
	'AVEN' : 'AVEN (gene)|AVEN',
	'AVIL' : 'AVIL (gene)|AVIL',
	'BATF' : 'BATF (gene)|BATF',
	'BBC3' : 'BBC3 (gene)|BBC3',
	'BIVM' : 'BIVM (gene)|BIVM',
	'BMX' : 'BMX (gene)|BMX',
	'BRF1' : 'BRF1 (gene)|BRF1',
	'BRF2' : 'BRF2 (gene)|BRF2',
	'CAD' : 'CAD (gene)|CAD',
	'CBS' : 'CBS (gene)|CBS',
	'CBSL' : 'CBSL (gene)|CBSL',
	'CCN2' : 'CCN2 (gene)|CCN2',
	'CCN3' : 'CCN3 (gene)|CCN3',
	'CCN4' : 'CCN4 (gene)|CCN4',
	'CCNC' : 'CCNC (gene)|CCNC',
	'CCNY' : 'CCNY (gene)|CCNY',
	'CCSAP' : 'CCSAP (gene)|CCSAP',
	'CCT2' : 'CCT2 (gene)|CCT2',
	'CCT5' : 'CCT5 (gene)|CCT5',
	'CEPT1' : 'CEPT1 (gene)|CEPT1',
	'CES3' : 'CES3 (gene)|CES3',
	'CGAS' : 'CGAS (gene)|CGAS',
	'CGB2' : 'CGB2 (gene)|CGB2',
	'CGB3' : 'CGB3 (gene)|CGB3',
	'CHGA' : 'CHGA (gene)|CHGA',
	'CHIA' : 'CHIA (gene)|CHIA',
	'CHML' : 'CHML (gene)|CHML',
	'CHP2' : 'CHP2 (gene)|CHP2',
	'CINP' : 'CINP (gene)|CINP',
	'CIPC' : 'CIPC (gene)|CIPC',
	'CKLF' : 'CKLF (gene)|CKLF',
	'CLK3' : 'CLK3 (gene)|CLK3',
	'CLK4' : 'CLK4 (gene)|CLK4',
	'CLMP' : 'CLMP (gene)|CLMP',
	'CLTA' : 'CLTA (gene)|CLTA',
	'CMC2' : 'CMC2 (gene)|CMC2',
	'CNN3' : 'CNN3 (gene)|CNN3',
	'CPA4' : 'CPA4 (gene)|CPA4',
	'CRB2' : 'CRB2 (gene)|CRB2',
	'CRCP' : 'CRCP (gene)|CRCP',
	'CROT' : 'CROT (gene)|CROT',
	'CSF3' : 'CSF3 (gene)|CSF3',
	'CSH2' : 'CSH2 (gene)|CSH2',
	'CSN3' : 'CSN3 (gene)|CSN3',
	'CST9' : 'CST9 (gene)|CST9',
	'CTSH' : 'CTSH (gene)|CTSH',
	'CTSW' : 'CTSW (gene)|CTSW',
	'CTU2' : 'CTU2 (gene)|CTU2',
	'CUTA' : 'CUTA (gene)|CUTA',
	'CYREN' : 'CYREN (gene)|CYREN',
	'DDT' : 'DDT (gene)|DDT',
	'DMTN' : 'DMTN (gene)|DMTN',
	'DMWD' : 'DMWD (gene)|DMWD',
	'DNA2' : 'DNA2 (gene)|DNA2',
	'DSEL' : 'DSEL (gene)|DSEL',
	'DR1' : 'DR1 (gene)|DR1',
	'DST' : 'DST (gene)|DST',
	'DSTN' : 'DSTN (gene)|DSTN',
	'DTL' : 'DTL (gene)|DTL',
	'DXO' : 'DXO (gene)|DXO',
	'EBF3' : 'EBF3 (gene)|EBF3',
	'ELL2' : 'ELL2 (gene)|ELL2',
	'EME2' : 'EME2 (gene)|EME2',
	'EN1' : 'EN1 (gene)|EN1',
	'EN2' : 'EN2 (gene)|EN2',
	'ENSA' : 'ENSA (gene)|ENSA',
	'EPOP' : 'EPOP (gene)|EPOP',
	'EPYC' : 'EPYC (gene)|EPYC',
	'ESAM' : 'ESAM (gene)|ESAM',
	'ESPN' : 'ESPN (gene)|ESPN',
	'ETDA' : 'ETDA (gene)|ETDA',
	'ETV2' : 'ETV2 (gene)|ETV2',
	'ETV3' : 'ETV3 (gene)|ETV3',
	'FARSA' : 'FARSA (gene)|FARSA',
	'FLNC' : 'FLNC (gene)|FLNC',
	'FMOD' : 'FMOD (gene)|FMOD',
	'FRY' : 'FRY (gene)|FRY',
	'GALP' : 'GALP (gene)|GALP',
	'GATB' : 'GATB (gene)|GATB',
	'GATM' : 'GATM (gene)|GATM',
	'GBA' : 'GBA (gene)|GBA',
	'GFY' : 'GFY (gene)|GFY',
	'GGCT' : 'GGCT (gene)|GGCT',
	'GMDS' : 'GMDS (gene)|GMDS',
	'GMIP' : 'GMIP (gene)|GMIP',
	'GPS2' : 'GPS2 (gene)|GPS2',
	'GPX2' : 'GPX2 (gene)|GPX2',
	'HECA' : 'HECA (gene)|HECA',
	'HPCA' : 'HPCA (gene)|HPCA',
	'HPX' : 'HPX (gene)|HPX',
	'ICOS' : 'ICOS (gene)|ICOS',
	'ID3' : 'ID3 (gene)|ID3',
	'IRGC' : 'IRGC (gene)|IRGC',
	'ISX' : 'ISX (gene)|ISX',
	'KAT7' : 'KAT7 (gene)|KAT7',
	'KAZN' : 'KAZN (gene)|KAZN',
	'KDSR' : 'KDSR (gene)|KDSR',
	'KEL' : 'KEL (gene)|KEL',
	'KIZ' : 'KIZ (gene)|KIZ',
	'KLB' : 'KLB (gene)|KLB',
	'KLLN' : 'KLLN (gene)|KLLN',
	'KMO' : 'KMO (gene)|KMO',
	'KNCN' : 'KNCN (gene)|KNCN',
	'KYNU' : 'KYNU (gene)|KYNU',
	'LEP' : 'LEP (gene)|LEP',
	'LIPI' : 'LIPI (gene)|LIPI',
	'LIPK' : 'LIPK (gene)|LIPK',
	'LIPN' : 'LIPN (gene)|LIPN',
	'LOX' : 'LOX (gene)|LOX',
	'LTV1' : 'LTV1 (gene)|LTV1',
	'LVRN' : 'LVRN (gene)|LVRN',
	'LXN' : 'LXN (gene)|LXN',
	'MAFA' : 'MAFA (gene)|MAFA',
	'MAGIX' : 'MAGIX (gene)|MAGIX',
	'MAL2' : 'MAL2 (gene)|MAL2',
	'MAVS' : 'MAVS (gene)|MAVS',
	'MBIP' : 'MBIP (gene)|MBIP',
	'MCAT' : 'MCAT (gene)|MCAT',
	'MGMT' : 'MGMT (gene)|MGMT',
	'MIB2' : 'MIB2 (gene)|MIB2',
	'MIDN' : 'MIDN (gene)|MIDN',
	'MIOS' : 'MIOS (gene)|MIOS',
	'MLEC' : 'MLEC (gene)|MLEC',
	'MLIP' : 'MLIP (gene)|MLIP',
	'MOK' : 'MOK (gene)|MOK',
	'MPEG1' : 'MPEG1 (gene)|MPEG1',
	'MRAP' : 'MRAP (gene)|MRAP',
	'MRM1' : 'MRM1 (gene)|MRM1',
	'MSLN' : 'MSLN (gene)|MSLN',
	'MSN' : 'MSN (gene)|MSN',
	'MSX2' : 'MSX2 (gene)|MSX2',
	'MT4' : 'MT4 (gene)|MT4',
	'MTR' : 'MTR (gene)|MTR',
	'MVD' : 'MVD (gene)|MVD',
	'MVK' : 'MVK (gene)|MVK',
	'MVP' : 'MVP (gene)|MVP',
	'MYNN' : 'MYNN (gene)|MYNN',
	'NACA' : 'NACA (gene)|NACA',
	'NAGA' : 'NAGA (gene)|NAGA',
	'NANP' : 'NANP (gene)|NANP',
	'NBL1' : 'NBL1 (gene)|NBL1',
	'NEBL' : 'NEBL (gene)|NEBL',
	'NEMF' : 'NEMF (gene)|NEMF',
	'NES' : 'NES (gene)|NES',
	'NFIC' : 'NFIC (gene)|NFIC',
	'NGEF' : 'NGEF (gene)|NGEF',
	'NHS' : 'NHS (gene)|NHS',
	'NKRF' : 'NKRF (gene)|NKRF',
	'NNAT' : 'NNAT (gene)|NNAT',
	'NPB' : 'NPB (gene)|NPB',
	'NRK' : 'NRK (gene)|NRK',
	'NRL' : 'NRL (gene)|NRL',
	'NVL' : 'NVL (gene)|NVL',
	'NXN' : 'NXN (gene)|NXN',
	'ODAM' : 'ODAM (gene)|ODAM',
	'OSCAR' : 'OSCAR (gene)|OSCAR',
	'OSR2' : 'OSR2 (gene)|OSR2',
	'OSTN' : 'OSTN (gene)|OSTN',
	'PATJ' : 'PATJ (gene)|PATJ',
	'PCP2' : 'PCP2 (gene)|PCP2',
	'PCTP' : 'PCTP (gene)|PCTP',
	'PDF' : 'PDF (gene)|PDF',
	'PIFO' : 'PIFO (gene)|PIFO',
	'PIGN' : 'PIGN (gene)|PIGN',
	'PIM2' : 'PIM2 (gene)|PIM2',
	'PLAA' : 'PLAA (gene)|PLAA',
	'PMCH' : 'PMCH (gene)|PMCH',
	'PNOC' : 'PNOC (gene)|PNOC',
	'POP1' : 'POP1 (gene)|POP1',
	'POP4' : 'POP4 (gene)|POP4',
	'PPCS' : 'PPCS (gene)|PPCS',
	'PPIE' : 'PPIE (gene)|PPIE',
	'PPIG' : 'PPIG (gene)|PPIG',
	'PPL' : 'PPL (gene)|PPL',
	'PREP' : 'PREP (gene)|PREP',
	'PRTG' : 'PRTG (gene)|PRTG',
	'PSD2' : 'PSD2 (gene)|PSD2',
	'PSG1' : 'PSG1 (gene)|PSG1',
	'QPRT' : 'QPRT (gene)|QPRT',
	'RAX' : 'RAX (gene)|RAX',
	'RBFA' : 'RBFA (gene)|RBFA',
	'RDX' : 'RDX (gene)|RDX',
	'REST' : 'REST (gene)|REST',
	'RFK' : 'RFK (gene)|RFK',
	'RGL4' : 'RGL4 (gene)|RGL4',
	'RHO' : 'RHO (gene)|RHO',
	'RHOF' : 'RHOF (gene)|RHOF',
	'RHOV' : 'RHOV (gene)|RHOV',
	'RTL4' : 'RTL4 (gene)|RTL4',
	'RTL5' : 'RTL5 (gene)|RTL5',
	'RTL9' : 'RTL9 (gene)|RTL9',
	'RTP1' : 'RTP1 (gene)|RTP1',
	'RTP2' : 'RTP2 (gene)|RTP2',
	'RTP3' : 'RTP3 (gene)|RTP3',
	'RTP4' : 'RTP4 (gene)|RTP4',
	'RTP5' : 'RTP5 (gene)|RTP5',
	'SBSN' : 'SBSN (gene)|SBSN',
	'SCEL' : 'SCEL (gene)|SCEL',
	'SCIMP' : 'SCIMP (gene)|SCIMP',
	'SCLY' : 'SCLY (gene)|SCLY',
	'SDHC' : 'SDHC (gene)|SDHC',
	'SELL' : 'SELL (gene)|SELL',
	'SHD' : 'SHD (gene)|SHD',
	'SHPK' : 'SHPK (gene)|SHPK',
	'SI' : 'SI (gene)|SI',
	'SIAE' : 'SIAE (gene)|SIAE',
	'SMS' : 'SMS (gene)|SMS',
	'SNCB' : 'SNCB (gene)|SNCB',
	'SP100' : 'SP100 (gene)|SP100',
	'SPARC' : 'SPARC (gene)|SPARC',
	'SPRN' : 'SPRN (gene)|SPRN',
	'SRMS' : 'SRMS (gene)|SRMS',
	'SSX3' : 'SSX3 (gene)|SSX3',
	'STYX' : 'STYX (gene)|STYX',
	'SUCO' : 'SUCO (gene)|SUCO',
	'SVOP' : 'SVOP (gene)|SVOP',
	'SYK' : 'SYK (gene)|SYK',
	'SYP' : 'SYP (gene)|SYP',
	'TACR2' : 'TACR2 (gene)|TACR2',
	'TEPP' : 'TEPP (gene)|TEPP',
	'THEMIS' : 'THEMIS (gene)|THEMIS',
	'TIFA' : 'TIFA (gene)|TIFA',
	'TSR1' : 'TSR1 (gene)|TSR1',
	'TXK' : 'TXK (gene)|TXK',
	'UBC' : 'UBC (gene)|UBC',
	'UFC1' : 'UFC1 (gene)|UFC1',
	'USF3' : 'USF3 (gene)|USF3',
	'VASP' : 'VASP (gene)|VASP',
	'VHLL' : 'VHLL (gene)|VHLL',
	'VIP' : 'VIP (gene)|VIP',
	'VMAC' : 'VMAC (gene)|VMAC',
	'WAPL' : 'WAPL (gene)|WAPL',
	'WDCP' : 'WDCP (gene)|WDCP',
	'WTAP' : 'WTAP (gene)|WTAP',
	'WTIP' : 'WTIP (gene)|WTIP',
}


# Saving the 'protein-coding_gene.txt' file in the current working directory
def downloadGeneFile():
	print("Downloading \'" , readFile,"\' from the HGNC ftp server. This may take a few seconds.\n", sep="")
	ftp = ftplib.FTP('ftp.ebi.ac.uk')
	ftp.login()
	ftp.cwd('/pub/databases/genenames/new/tsv/locus_groups')
	with io.open(readFile, 'wb') as data:
		ftp.retrbinary('RETR protein-coding_gene.txt', data.write)

# Writing the wikitext files
def forWP():
	# Generating date variable for the "Complete list" template and the citation template variable for the wikitables
	currentMonthYear = f'{startTime:%B} {startTime.year}'
	currentDate = f'{startTime.day} {startTime:%B} {startTime.year}'
	url = "https://www.genenames.org/download/statistics-and-files/"
	title = "Statistics & download files"
	publisher = "HUGO Gene Nomenclature Committee"
	tableReference = "{{safesubst:#tag:ref|{{cite web | title = " + title + " | url = " + url + " | website = www.genenames.org | publisher = " + publisher + " | accessdate = " + currentDate + " | date = " + currentDate + "}}}}"
	navboxGenes=[]

	# Setting initial index value
	i = 0

	# This code block reads the HGNC protein-coding_gene.txt file and writes the source code of all four list pages to different text files
	with open(readFile, 'r', encoding='utf-8') as input:
		with open(writeFile1, 'w', encoding='utf-8') as wiki1, open(writeFile2, 'w', encoding='utf-8') as wiki2, open(writeFile3, 'w', encoding='utf-8') as wiki3, open(writeFile4, 'w', encoding='utf-8') as wiki4:
			# Setting the script to initially write wikitable data to the first file
			wiki = wiki1

			for line in csv.reader(input, delimiter="\t"):
				# Manually writing in table headers
				if i == 0:
					print("{{Complete list|date=" + currentMonthYear + "}}",file=wiki)
					print("{{:Wikipedia:WikiProject Molecular Biology/Molecular and Cell Biology/Human protein-coding genes}}", file=wiki)
					print("{| class=\"wikitable sortable\" style=\"margin: 1em auto;\"", file=wiki)
					print("|+ {{nowrap|[[Human protein-coding genes]] listed in the [[HGNC]] database", tableReference, "}}", sep="", file=wiki)
					print("! scope=\"col\" | index", file=wiki)
					print("! scope=\"col\" | [[Gene symbol]]", sep="", file=wiki)
					print("! scope=\"col\" | HGNC ID", sep="", file=wiki)
					print("! scope=\"col\" | [[UniProt]] ID(s)", sep="", file=wiki)
					print("|-", file=wiki)
					i+=1

				# NB: the "and line[5]=='Approved'" restriction excludes the "Entry withdrawn" symbols at the end of the 4th list page.  
				elif i > 0 and line[5]=='Approved':

					navboxIndices=[1,5000,5001,10000,10001,15000,15001]
					# Appending gene symbols to list of navbox gene symbol indices
					if i in navboxIndices:
						if line[1] in mistargetedLinkDictionary.keys():
							navboxGenes.append("[["+mistargetedLinkDictionary[line[1]]+"]]")
						else:
							navboxGenes.append("[["+line[1]+"]]")

					# Using the parser to get the rest of the table data
					# Index
					print("|" , i, sep="", file=wiki)
					# Gene symbol
					if line[1] in mistargetedLinkDictionary.keys():
						print("|[[",mistargetedLinkDictionary[line[1]],"]]", sep="", file=wiki)
					else:
						print("|[[",line[1],"]]", sep="", file=wiki)
					# HGNC ID
					print("|{{HGNC|", line[0][5:], "}}", sep="", file=wiki)
					# UNIPROT IDs for proteins
					uniprot = line[25]
					uniprotIDs = uniprot.split("|")
					print("|",  sep="",  end="", file=wiki)
					z = 1
					for id in uniprotIDs:
						print("{{uniprot|",id,"}}", sep="", end="", file=wiki)
						if z < len(uniprotIDs):
							print("", end="<br />", file=wiki)
						z+=1
					print("\n|-", file=wiki)

					i+=1

					# Setting the script to write wikitable data to the second, third, and fourth files as of entry 5001, 10001, and 15001
					if (i == 5001) or (i == 10001) or (i == 15001):
						print("|}", file=wiki)
						print("\n==References==\n{{Reflist}}\n", file=wiki)
						if (i == 5001):
							print("[[Category:Genetics-related lists|Human protein-coding genes 1]]", file=wiki)
							wiki = wiki2
						if (i == 10001):
							print("[[Category:Genetics-related lists|Human protein-coding genes 2]]", file=wiki)
							wiki = wiki3
						if (i == 15001):
							print("[[Category:Genetics-related lists|Human protein-coding genes 3]]", file=wiki)
							wiki = wiki4
						print("{{Complete list|date=" + currentMonthYear + "}}",file=wiki)
						print("{{:Wikipedia:WikiProject Molecular Biology/Molecular and Cell Biology/Human protein-coding genes}}", file=wiki)
						print("{| class=\"wikitable sortable\" style=\"margin: 1em auto;\"", file=wiki)
						print("|+ {{nowrap|[[Human protein-coding genes]] listed in the [[HGNC]] database", tableReference, "}}", sep="", file=wiki)
						print("! scope=\"col\" | index", file=wiki)
						print("! scope=\"col\" | [[Gene symbol]]", sep="", file=wiki)
						print("! scope=\"col\" | HGNC ID", sep="", file=wiki)
						print("! scope=\"col\" | [[UniProt]] ID(s)", sep="", file=wiki)
						print("|-", file=wiki)

					# Sets the "lastGeneSymbol" variable to the current loop index's gene symbol
					lastGeneSymbol=line[1]

			# Append the last gene symbol to the list of navbox indices
			if lastGeneSymbol in mistargetedLinkDictionary.keys():
				navboxGenes.append("[["+mistargetedLinkDictionary[lastGeneSymbol]+"]]")
			else:
				navboxGenes.append("[["+lastGeneSymbol+"]]")

			# Manually writing out the wikitable footer, references section, and category for the fourth file
			print("|}", file=wiki)
			print("\n==References==\n{{Reflist}}\n", file=wiki)
			print("[[Category:Genetics-related lists|Human protein-coding genes 4]]", file=wiki)
		return navboxGenes

def runBot(navboxGenes, text=None, pauseOn=False, sandbox=False):
	# Functions for pausing and debugging errors during page writing
	def pause():
		return input("Press Enter to continue ...")

	def errorMessage():
		print("Something went wrong when writing the page. =<")
		print('Error: {}. {}, line: {}'.format(sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2].tb_lineno))
	
	# Defining pages to edit
	if sandbox==True:
		sandboxPrefix="Wikipedia:WikiProject Molecular Biology/Molecular and Cell Biology/Human protein-coding genes"
		pageList=[sandboxPrefix+"1",sandboxPrefix+"2",sandboxPrefix+"3",sandboxPrefix+"4"]
	else:
		articlePrefix="List of human protein-coding genes "
		pageList=[articlePrefix+"1",articlePrefix+"2",articlePrefix+"3",articlePrefix+"4"]
	gene1 = pageList[0]
	gene2 = pageList[1]
	gene3 = pageList[2]
	gene4 = pageList[3]
	navbox= "Wikipedia:WikiProject Molecular Biology/Molecular and Cell Biology/Human protein-coding genes"

	# Logging in
	site = pybot.Site('en', 'wikipedia')
	site.login()

	# Default edit summary
	comment = 'Manually running the Python script to perform an unscheduled update'

	if type(text) == str:
		comment = text
	elif type(text) != str and text != None:
		print("The input text must be a string")
	else:
		pass
	with open(writeFile1, 'r', encoding='utf-8') as page1, open(writeFile2, 'r', encoding='utf-8') as page2, open(writeFile3, 'r', encoding='utf-8') as page3, open(writeFile4, 'r', encoding='utf-8') as page4:
		try:
			# Loading the first gene list page and rewriting the page content with the current HGNC data
			wikipage = pybot.Page(site, gene1)
			print("Updating HGNC data in ", wikipage.title(), sep="")
			lines = page1.read(1800000)
			wikipage.text = lines
			print(wikipage.text)
			if pauseOn==True:
				pause()
			wikipage.save(summary=comment, minor=False)
		
		except:
			pybot.logging.error(errorMessage())
		
		try:
			# Loading the second gene list page and rewriting the page content with the current HGNC data
			wikipage = pybot.Page(site, gene2)
			print("Updating HGNC data in ", wikipage.title(), sep="")
			lines = page2.read(1800000)
			wikipage.text = lines
			print(wikipage.text)
			if pauseOn==True:
				pause()
			wikipage.save(summary=comment, minor=False)
		
		except:
			pybot.logging.error(errorMessage())
		
		try:
			# Loading the third gene list page and rewriting the page content with the current HGNC data
			wikipage = pybot.Page(site, gene3)
			print("Updating HGNC data in ", wikipage.title(), sep="")
			lines = page3.read(1800000)
			wikipage.text = lines
			print(wikipage.text)
			if pauseOn==True:
				pause()
			wikipage.save(summary=comment, minor=False)
		
		except:
			pybot.logging.error(errorMessage())
		
		try:
			# Loading the fourth gene list page and rewriting the page content with the current HGNC data
			wikipage = pybot.Page(site, gene4)
			print("Updating HGNC data in ", wikipage.title(), sep="")
			lines = page4.read(1800000)
			wikipage.text = lines
			print(wikipage.text)
			if pauseOn==True:
				pause()
			wikipage.save(summary=comment, minor=False)

		except:
			pybot.logging.error(errorMessage())

	try:
		# Loading the navbox page and rewriting the page content with the current HGNC data
			wikipage = pybot.Page(site, navbox)
			print("Updating navbox page: ", wikipage.title(), sep="")

			regex =[r"(?<=1\]\] covers genes )[\S\d ]*?(?=<br \/>)", r"(?<=2\]\] covers genes )[\S\d ]*?(?=<br \/>)",
					r"(?<=3\]\] covers genes )[\S\d ]*?(?=<br \/>)", r"(?<=4\]\] covers genes )[\S\d ]*?(?=<br \/>)"]
			substitute=[navboxGenes[0]+"–"+navboxGenes[1], navboxGenes[2]+"–"+navboxGenes[3], 
						navboxGenes[4]+"–"+navboxGenes[5], navboxGenes[6]+"–"+navboxGenes[7]]

			text=wikipage.text
			for k in range(0,4):
				text=re.sub(regex[k],substitute[k],text)
			wikipage.text = text

			if pauseOn==True:
				pause()
			wikipage.save(summary="Updating gene list cutoffs", minor=False)

	except:
		pybot.logging.error(errorMessage())

	# Deleting the downloaded and auto-generated text files
	print("Deleting the text files used in this bot script:", readFile, writeFile1, writeFile2, writeFile3, writeFile4, sep="\n")
	os.remove(readFile)
	os.remove(writeFile1)
	os.remove(writeFile2)
	os.remove(writeFile3)
	os.remove(writeFile4)
	print("\nFinished writing to Wikipedia\n")

def main():
	downloadGeneFile()
	navboxGenes=forWP()
	runBot(navboxGenes, text="Performing gene list update" , pauseOn=False, sandbox=False)
	print("Time to execute script:", datetime.now() - startTime)

# Calling the functions defined above
if __name__ == "__main__":
	main()