Lompat ke isi

Wikipedia:ProyekWiki Biologi/Gen penyandi protein

Dari Wikipedia bahasa Indonesia, ensiklopedia bebas

Kode sumber python3 (menggunakan kerangka kerja pywikibot) untuk membuat daftar otomatis seperti pada: Daftar gen penyandi protein pada manusia/1

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
import csv
import io
import sys
import os
import ftplib
from datetime import datetime
import pywikibot as pybot

startTime = datetime.now()

# Setting the directory to the script/executable file location
path = os.path.dirname(sys.argv[0])
os.chdir(path)
print("Current working directory: ", os.getcwd(), "\n")

# Setting input/output variables
readFile = 'protein-coding_gene.txt'
writeFile1 = 'Human protein-coding genes wikitext 1.txt'
writeFile2 = 'Human protein-coding genes wikitext 2.txt'
writeFile3 = 'Human protein-coding genes wikitext 3.txt'
writeFile4 = 'Human protein-coding genes wikitext 4.txt'


# The dictionary below contains gene symbols listed in the Dablinks tool as dictionary keys and piped links, sans brackets, as values.
# This dictionary also contains similar key-value pairs for non-gene-related articles that are located at a gene symbol's pagename.
# Comments are included to indicate which gene symbols are dablinks vs links to an unrelated article.
# The dictionary is used in the forWP() function to write a piped wikilink instead of a plain link if the gene symbol is a dictionary key.

mistargetedLinkDictionary = {
	# Approved gene symbols (links to disambiguation pages):
	'A2M' : 'A2M (gene)|A2M',
	'AAAS' : 'AAAS (gene)|AAAS',
	'AACS' : 'AACS (gene)|AACS',
	'AASS' : 'AASS (gene)|AASS',
	'AATF' : 'AATF (gene)|AATF',
	'ABO' : 'ABO (gene)|ABO',
	'ABR' : 'ABR (gene)|ABR',
	'ABRA' : 'ABRA (gene)|ABRA',
	'ACAN' : 'ACAN (gene)|ACAN',
	'ACCS' : 'ACCS (gene)|ACCS',
	'ACD' : 'ACD (gene)|ACD',
	'ACE' : 'ACE (gene)|ACE',
	'ACR' : 'ACR (gene)|ACR',
	'ADA' : 'ADA (gene)|ADA',
	'ADGB' : 'ADGB (gene)|ADGB',
	'ADK' : 'ADK (gene)|ADK',
	'ADM' : 'ADM (gene)|ADM',
	'ADNP' : 'ADNP (gene)|ADNP',
	'ADO' : 'ADO (gene)|ADO',
	'AEN' : 'AEN (gene)|AEN',
	'AFM' : 'AFM (gene)|AFM',
	'AFP' : 'AFP (gene)|AFP',
	'AGA' : 'AGA (gene)|AGA',
	'AGK' : 'AGK (gene)|AGK',
	'AGL' : 'AGL (gene)|AGL',
	'AGPS' : 'AGPS (gene)|AGPS',
	'AGRP' : 'AGRP (gene)|AGRP',
	'AGT' : 'AGT (gene)|AGT',
	'AHRR' : 'AHRR (gene)|AHRR',
	'AIDA' : 'AIDA (gene)|AIDA',
	'AIP' : 'AIP (gene)|AIP',
	'ALB' : 'ALB (gene)|ALB',
	'ALK' : 'ALK (gene)|ALK',
	'ALPG' : 'ALPG (gene)|ALPG',
	'AMH' : 'AMH (gene)|AMH',
	'AMN' : 'AMN (gene)|AMN',
	'AMT' : 'AMT (gene)|AMT',
	'ANG' : 'ANG (gene)|ANG',
	'APC' : 'APC (gene)|APC',
	'APCS' : 'APCS (gene)|APCS',
	'APOE' : 'APOE (gene)|APOE',
	'APP' : 'APP (gene)|APP',
	'APRT' : 'APRT (gene)|APRT',
	'AQR' : 'AQR (gene)|AQR',
	'AR' : 'AR (gene)|AR',
	'ARC' : 'ARC (gene)|ARC',
	'ARNT' : 'ARNT (gene)|ARNT',
	'ARSA' : 'ARSA (gene)|ARSA',
	'ARSD' : 'ARSD (gene)|ARSD',
	'ARX' : 'ARX (gene)|ARX',
	'ASIP' : 'ASIP (gene)|ASIP',
	'ASPA' : 'ASPA (gene)|ASPA',
	'ASPM' : 'ASPM (gene)|ASPM',
	'ATIC' : 'ATIC (gene)|ATIC',
	'ATM' : 'ATM (gene)|ATM',
	'ATR' : 'ATR (gene)|ATR',
	'AUH' : 'AUH (gene)|AUH',
	'AVP' : 'AVP (gene)|AVP',
	'AXL' : 'AXL (gene)|AXL',
	'B2M' : 'B2M (gene)|B2M',
	'BAD' : 'BAD (gene)|BAD',
	'BAX' : 'BAX (gene)|BAX',
	'BBX' : 'BBX (gene)|BBX',
	'BCOR' : 'BCOR (gene)|BCOR',
	'BCR' : 'BCR (gene)|BCR',
	'BGN' : 'BGN (gene)|BGN',
	'BID' : 'BID (gene)|BID',
	'BIK' : 'BIK (gene)|BIK',
	'BLK' : 'BLK (gene)|BLK',
	'BLM' : 'BLM (gene)|BLM',
	'BMF' : 'BMF (gene)|BMF',
	'BMP1' : 'BMP1 (gene)|BMP1',
	'BMP2' : 'BMP2 (gene)|BMP2',
	'BMP3' : 'BMP3 (gene)|BMP3',
	'BOC' : 'BOC (gene)|BOC',
	'BOK' : 'BOK (gene)|BOK',
	'BPI' : 'BPI (gene)|BPI',
	'BRAF' : 'BRAF (gene)|BRAF',
	'BSG' : 'BSG (gene)|BSG',
	'BSN' : 'BSN (gene)|BSN',
	'BSX' : 'BSX (gene)|BSX',
	'BTC' : 'BTC (gene)|BTC',
	'BTK' : 'BTK (gene)|BTK',
	'BTRC' : 'BTRC (gene)|BTRC',
	'C2' : 'C2 (gene)|C2',
	'C3' : 'C3 (gene)|C3',
	'C5' : 'C5 (gene)|C5',
	'C6' : 'C6 (gene)|C6',
	'C7' : 'C7 (gene)|C7',
	'C9' : 'C9 (gene)|C9',
	'CA1' : 'CA1 (gene)|CA1',
	'CA2' : 'CA2 (gene)|CA2',
	'CA3' : 'CA3 (gene)|CA3',
	'CA4' : 'CA4 (gene)|CA4',
	'CA6' : 'CA6 (gene)|CA6',
	'CA7' : 'CA7 (gene)|CA7',
	'CAMP' : 'CAMP (gene)|CAMP',
	'CAPS' : 'CAPS (gene)|CAPS',
	'CARF' : 'CARF (gene)|CARF',
	'CASR' : 'CASR (gene)|CASR',
	'CAST' : 'CAST (gene)|CAST',
	'CAT' : 'CAT (gene)|CAT',
	'CAV1' : 'CAV1 (gene)|CAV1',
	'CAV3' : 'CAV3 (gene)|CAV3',
	'CBL' : 'CBL (gene)|CBL',
	'CBLB' : 'CBLB (gene)|CBLB',
	'CBX2' : 'CBX2 (gene)|CBX2',
	'CBX5' : 'CBX5 (gene)|CBX5',
	'CBX7' : 'CBX7 (gene)|CBX7',
	'CCK' : 'CCK (gene)|CCK',
	'CCN1' : 'CCN1 (gene)|CCN1',
	'CCNI' : 'CCNI (gene)|CCNI',
	'CCR3' : 'CCR3 (gene)|CCR3',
	'CCS' : 'CCS (gene)|CCS',
	'CD5' : 'CD5 (gene)|CD5',
	'CDA' : 'CDA (gene)|CDA',
	'CDH1' : 'CDH1 (gene)|CDH1',
	'CDH3' : 'CDH3 (gene)|CDH3',
	'CDNF' : 'CDNF (gene)|CDNF',
	'CDR1' : 'CDR1 (gene)|CDR1',
	'CDR2' : 'CDR2 (gene)|CDR2',
	'CDV3' : 'CDV3 (gene)|CDV3',
	'CDX4' : 'CDX4 (gene)|CDX4',
	'CEL' : 'CEL (gene)|CEL',
	'CER1' : 'CER1 (gene)|CER1',
	'CFB' : 'CFB (gene)|CFB',
	'CFD' : 'CFD (gene)|CFD',
	'CFH' : 'CFH (gene)|CFH',
	'CFI' : 'CFI (gene)|CFI',
	'CFL2' : 'CFL2 (gene)|CFL2',
	'CFP' : 'CFP (gene)|CFP',
	'CGA' : 'CGA (gene)|CGA',
	'CGN' : 'CGN (gene)|CGN',
	'CHAT' : 'CHAT (gene)|CHAT',
	'CHGB' : 'CHGB (gene)|CHGB',
	'CHKB' : 'CHKB (gene)|CHKB',
	'CHM' : 'CHM (gene)|CHM',
	'CHRD' : 'CHRD (gene)|CHRD',
	'CIC' : 'CIC (gene)|CIC',
	'CIT' : 'CIT (gene)|CIT',
	'CKB' : 'CKB (gene)|CKB',
	'CKM' : 'CKM (gene)|CKM',
	'CLC' : 'CLC (gene)|CLC',
	'CLPP' : 'CLPP (gene)|CLPP',
	'CLPS' : 'CLPS (gene)|CLPS',
	'CLU' : 'CLU (gene)|CLU',
	'CMAS' : 'CMAS (gene)|CMAS',
	'CMIP' : 'CMIP (gene)|CMIP',
	'CNP' : 'CNP (gene)|CNP',
	'COCH' : 'COCH (gene)|COCH',
	'COIL' : 'COIL (gene)|COIL',
	'COMP' : 'COMP (gene)|COMP',
	'COPA' : 'COPA (gene)|COPA',
	'COPE' : 'COPE (gene)|COPE',
	'COQ3' : 'COQ3 (gene)|COQ3',
	'CP' : 'CP (gene)|CP',
	'CPD' : 'CPD (gene)|CPD',
	'CPE' : 'CPE (gene)|CPE',
	'CPM' : 'CPM (gene)|CPM',
	'CPO' : 'CPO (gene)|CPO',
	'CPOX' : 'CPOX (gene)|CPOX',
	'CPQ' : 'CPQ (gene)|CPQ',
	'CPS1' : 'CPS1 (gene)|CPS1',
	'CPT2' : 'CPT2 (gene)|CPT2',
	'CPTP' : 'CPTP (gene)|CPTP',
	'CPZ' : 'CPZ (gene)|CPZ',
	'CR1' : 'CR1 (gene)|CR1',
	'CR2' : 'CR2 (gene)|CR2',
	'CRAT' : 'CRAT (gene)|CRAT',
	'CREM' : 'CREM (gene)|CREM',
	'CRH' : 'CRH (gene)|CRH',
	'CRK' : 'CRK (gene)|CRK',
	'CRP' : 'CRP (gene)|CRP',
	'CRX' : 'CRX (gene)|CRX',
	'CS' : 'CS (gene)|CS',
	'CSK' : 'CSK (gene)|CSK',
	'CST3' : 'CST3 (gene)|CST3',
	'CST6' : 'CST6 (gene)|CST6',
	'CST7' : 'CST7 (gene)|CST7',
	'CST8' : 'CST8 (gene)|CST8',
	'CSTA' : 'CSTA (gene)|CSTA',
	'CTH' : 'CTH (gene)|CTH',
	'CTNS' : 'CTNS (gene)|CTNS',
	'CTRC' : 'CTRC (gene)|CTRC',
	'CTRL' : 'CTRL (gene)|CTRL',
	'CTSA' : 'CTSA (gene)|CTSA',
	'CTSC' : 'CTSC (gene)|CTSC',
	'CTSS' : 'CTSS (gene)|CTSS',
	'CUTC' : 'CUTC (gene)|CUTC',
	'CYBA' : 'CYBA (gene)|CYBA',
	'CYCS' : 'CYCS (gene)|CYCS',
	'CYGB' : 'CYGB (gene)|CYGB',
	'CYLD' : 'CYLD (gene)|CYLD',
	'DAO' : 'DAO (gene)|DAO',
	'DAP' : 'DAP (gene)|DAP',
	'DBH' : 'DBH (gene)|DBH',
	'DBI' : 'DBI (gene)|DBI',
	'DBNL' : 'DBNL (gene)|DBNL',
	'DBP' : 'DBP (gene)|DBP',
	'DBT' : 'DBT (gene)|DBT',
	'DCC' : 'DCC (gene)|DCC',
	'DCD' : 'DCD (gene)|DCD',
	'DCK' : 'DCK (gene)|DCK',
	'DCN' : 'DCN (gene)|DCN',
	'DCPS' : 'DCPS (gene)|DCPS',
	'DCT' : 'DCT (gene)|DCT',
	'DCX' : 'DCX (gene)|DCX',
	'DDC' : 'DDC (gene)|DDC',
	'DDN' : 'DDN (gene)|DDN',
	'DDO' : 'DDO (gene)|DDO',
	'DDR2' : 'DDR2 (gene)|DDR2',
	'DEK' : 'DEK (gene)|DEK',
	'DENR' : 'DENR (gene)|DENR',
	'DERA' : 'DERA (gene)|DERA',
	'DES' : 'DES (gene)|DES',
	'DFFB' : 'DFFB (gene)|DFFB', 
	'DHH' : 'DHH (gene)|DHH',
	'DIABLO' : 'DIABLO (gene)|DIABLO',
	'DLD' : 'DLD (gene)|DLD',
	'DMD' : 'DMD (gene)|DMD',
	'DMPK' : 'DMPK (gene)|DMPK',
	'DOLK' : 'DOLK (gene)|DOLK',
	'DPT' : 'DPT (gene)|DPT',
	'DSE' : 'DSE (gene)|DSE',
	'DSP' : 'DSP (gene)|DSP',
	'DUT' : 'DUT (gene)|DUT',
	'EBP' : 'EBP (gene)|EBP',
	'ECD' : 'ECD (gene)|ECD',
	'EDA' : 'EDA (gene)|EDA',
	'EED' : 'EED (gene)|EED',
	'EFS' : 'EFS (gene)|EFS',
	'EGF' : 'EGF (gene)|EGF',
	'EGFR' : 'EGFR (gene)|EGFR',
	'EHF' : 'EHF (gene)|EHF',
	'ELL' : 'ELL (gene)|ELL',
	'ELN' : 'ELN (gene)|ELN',
	'EMB' : 'EMB (gene)|EMB',
	'EMC2' : 'EMC2 (gene)|EMC2',
	'EMD' : 'EMD (gene)|EMD',
	'ENAH' : 'ENAH (gene)|ENAH',
	'ENG' : 'ENG (gene)|ENG',
	'EPO' : 'EPO (gene)|EPO',
	'EPOR' : 'EPOR (gene)|EPOR',
	'EPX' : 'EPX (gene)|EPX',
	'ERAS' : 'ERAS (gene)|ERAS',
	'ERC2' : 'ERC2 (gene)|ERC2',
	'ERF' : 'ERF (gene)|ERF',
	'ERG' : 'ERG (gene)|ERG',
	'ERH' : 'ERH (gene)|ERH',
	'ESD' : 'ESD (gene)|ESD',
	'EVC' : 'EVC (gene)|EVC',
	'EVL' : 'EVL (gene)|EVL',
	'EVPL' : 'EVPL (gene)|EVPL',
	'EXT2' : 'EXT2 (gene)|EXT2',
	'F10' : 'F10 (gene)|F10',
	'F11' : 'F11 (gene)|F11',
	'F12' : 'F12 (gene)|F12',
	'F2' : 'F2 (gene)|F2',
	'F3' : 'F3 (gene)|F3',
	'F5' : 'F5 (gene)|F5',
	'F7' : 'F7 (gene)|F7',
	'F8' : 'F8 (gene)|F8',
	'F9' : 'F9 (gene)|F9',
	'FAH' : 'FAH (gene)|FAH',
	'FAP' : 'FAP (gene)|FAP',
	'FAS' : 'FAS (gene)|FAS',
	'FASN' : 'FASN (gene)|FASN',
	'FAU' : 'FAU (gene)|FAU',
	'FBL' : 'FBL (gene)|FBL',
	'FECH' : 'FECH (gene)|FECH',
	'FER' : 'FER (gene)|FER',
	'FES' : 'FES (gene)|FES',
	'FEV' : 'FEV (gene)|FEV',
	'FGA' : 'FGA (gene)|FGA',
	'FGB' : 'FGB (gene)|FGB',
	'FGG' : 'FGG (gene)|FGG',
	'FGR' : 'FGR (gene)|FGR',
	'FH' : 'FH (gene)|FH',
	'FLG' : 'FLG (gene)|FLG',
	'FN3K' : 'FN3K (gene)|FN3K',
	'FOS' : 'FOS (gene)|FOS',
	'FPGS' : 'FPGS (gene)|FPGS',
	'FRK' : 'FRK (gene)|FRK',
	'FST' : 'FST (gene)|FST',
	'FTL' : 'FTL (gene)|FTL',
	'FTO' : 'FTO (gene)|FTO',
	'FUS' : 'FUS (gene)|FUS',
	'GAA' : 'GAA (gene)|GAA',
	'GAK' : 'GAK (gene)|GAK',
	'GAL' : 'GAL (gene)|GAL',
	'GALK2' : 'GALK2 (gene)|GALK2',
	'GALT' : 'GALT (gene)|GALT',
	'GAN' : 'GAN (gene)|GAN',
	'GART' : 'GART (gene)|GART',
	'GATC' : 'GATC (gene)|GATC',
	'GC' : 'GC (gene)|GC',
	'GCA' : 'GCA (gene)|GCA',
	'GCG' : 'GCG (gene)|GCG',
	'GCK' : 'GCK (gene)|GCK',
	'GCNA' : 'GCNA (gene)|GCNA',
	'GDA' : 'GDA (gene)|GDA',
	'GEM' : 'GEM (gene)|GEM',
	'GFAP' : 'GFAP (gene)|GFAP',
	'GGH' : 'GGH (gene)|GGH',
	'GGN' : 'GGN (gene)|GGN',
	'GH1' : 'GH1 (gene)|GH1',
	'GHR' : 'GHR (gene)|GHR',
	'GIP' : 'GIP (gene)|GIP',
	'GK' : 'GK (gene)|GK',
	'GK2' : 'GK2 (gene)|GK2',
	'GLA' : 'GLA (gene)|GLA',
	'GLS' : 'GLS (gene)|GLS',
	'GML' : 'GML (gene)|GML',
	'GNAS' : 'GNAS (gene)|GNAS',
	'GNE' : 'GNE (gene)|GNE',
	'GNS' : 'GNS (gene)|GNS',
	'GP2' : 'GP2 (gene)|GP2',
	'GP5' : 'GP5 (gene)|GP5',
	'GP9' : 'GP9 (gene)|GP9',
	'GPI' : 'GPI (gene)|GPI',
	'GPT' : 'GPT (gene)|GPT',
	'GPT2' : 'GPT2 (gene)|GPT2',
	'GRASP' : 'GRASP (gene)|GRASP',
	'GRIP1' : 'GRIP1 (gene)|GRIP1',
	'GRN' : 'GRN (gene)|GRN',
	'GRP' : 'GRP (gene)|GRP',
	'GSC' : 'GSC (gene)|GSC',
	'GSN' : 'GSN (gene)|GSN',
	'GSR' : 'GSR (gene)|GSR',
	'GSS' : 'GSS (gene)|GSS',
	'HAL' : 'HAL (gene)|HAL',
	'HBM' : 'HBM (gene)|HBM',
	'HBZ' : 'HBZ (gene)|HBZ',
	'HCCS' : 'HCCS (gene)|HCCS',
	'HCST' : 'HCST (gene)|HCST',
	'HDC' : 'HDC (gene)|HDC',
	'HDGF' : 'HDGF (gene)|HDGF',
	'HDX' : 'HDX (gene)|HDX',
	'HFE' : 'HFE (gene)|HFE',
	'HGD' : 'HGD (gene)|HGD',
	'HGF' : 'HGF (gene)|HGF',
	'HGS' : 'HGS (gene)|HGS',
	'HJV' : 'HJV (gene)|HJV',
	'HLF' : 'HLF (gene)|HLF',
	'HLX' : 'HLX (gene)|HLX',
	'HMBS' : 'HMBS (gene)|HMBS',
	'HP' : 'HP (gene)|HP',
	'HPD' : 'HPD (gene)|HPD',
	'HPN' : 'HPN (gene)|HPN',
	'HPR' : 'HPR (gene)|HPR',
	'HPSE' : 'HPSE (gene)|HPSE',
	'HR' : 'HR (gene)|HR',
	'HRC' : 'HRC (gene)|HRC',
	'HRG' : 'HRG (gene)|HRG',
	'HRK' : 'HRK (gene)|HRK',
	'HTT' : 'HTT (gene)|HTT',
	'HUNK' : 'HUNK (gene)|HUNK',
	'HYI' : 'HYI (gene)|HYI',
	'IAPP' : 'IAPP (gene)|IAPP',
	'IDE' : 'IDE (gene)|IDE',
	'IDS' : 'IDS (gene)|IDS',
	'IGH' : 'IGH (gene)|IGH',
	'IGK' : 'IGK (gene)|IGK',
	'IGL' : 'IGL (gene)|IGL',
	'IHH' : 'IHH (gene)|IHH',
	'IK' : 'IK (gene)|IK',
	'IL2' : 'IL2 (gene)|IL2',
	'IL3' : 'IL3 (gene)|IL3',
	'IL4' : 'IL4 (gene)|IL4',
	'IL5' : 'IL5 (gene)|IL5',
	'IL6' : 'IL6 (gene)|IL6',
	'IL7' : 'IL7 (gene)|IL7',
	'IL9' : 'IL9 (gene)|IL9',
	'IL10' : 'IL10 (gene)|IL10',
	'IL11' : 'IL11 (gene)|IL11', 
	'IL13' : 'IL13 (gene)|IL13',
	'IL15' : 'IL15 (gene)|IL15',
	'IL16' : 'IL16 (gene)|IL16',
	'IL18' : 'IL18 (gene)|IL18',
	'IL19' : 'IL19 (gene)|IL19',
	'IL20' : 'IL20 (gene)|IL20',
	'IL21' : 'IL21 (gene)|IL21',
	'IL22' : 'IL22 (gene)|IL22',
	'IL24' : 'IL24 (gene)|IL24', 
	'IL25' : 'IL25 (gene)|IL25', 
	'IL26' : 'IL26 (gene)|IL26', 
	'IL32' : 'IL32 (gene)|IL32', 
	'IMPACT' : 'IMPACT (gene)|IMPACT',
	'INA' : 'INA (gene)|INA',
	'INS' : 'INS (gene)|INS',
	'INSC' : 'INSC (gene)|INSC',
	'INTU' : 'INTU (gene)|INTU',
	'IPP' : 'IPP (gene)|IPP',
	'ITK' : 'ITK (gene)|ITK',
	'IVD' : 'IVD (gene)|IVD',
	'IVL' : 'IVL (gene)|IVL',
	'JMY' : 'JMY (gene)|JMY',
	'JRK' : 'JRK (gene)|JRK',
	'JTB' : 'JTB (gene)|JTB',
	'JUN' : 'JUN (gene)|JUN',
	'JUP' : 'JUP (gene)|JUP',
	'KCP' : 'KCP (gene)|KCP',
	'KDR' : 'KDR (gene)|KDR',
	'KERA' : 'KERA (gene)|KERA',
	'KHK' : 'KHK (gene)|KHK',
	'KIN' : 'KIN (gene)|KIN',
	'KIT' : 'KIT (gene)|KIT',
	'KL' : 'KL (gene)|KL',
	'KPRP' : 'KPRP (gene)|KPRP',
	'KPTN' : 'KPTN (gene)|KPTN',
	'KY' : 'KY (gene)|KY',
	'LAT' : 'LAT (gene)|LAT',
	'LBH' : 'LBH (gene)|LBH',
	'LBP' : 'LBP (gene)|LBP',
	'LBR' : 'LBR (gene)|LBR',
	'LCAT' : 'LCAT (gene)|LCAT',
	'LCK' : 'LCK (gene)|LCK',
	'LCT' : 'LCT (gene)|LCT',
	'LHB' : 'LHB (gene)|LHB',
	'LIAS' : 'LIAS (gene)|LIAS',
	'LIF' : 'LIF (gene)|LIF',
	'LIPA' : 'LIPA (gene)|LIPA',
	'LIPC' : 'LIPC (gene)|LIPC',
	'LIPE' : 'LIPE (gene)|LIPE',
	'LPA' : 'LPA (gene)|LPA',
	'LPL' : 'LPL (gene)|LPL',
	'LPO' : 'LPO (gene)|LPO',
	'LPP' : 'LPP (gene)|LPP',
	'LRAT' : 'LRAT (gene)|LRAT',
	'LRP4' : 'LRP4 (gene)|LRP4',
	'LSR' : 'LSR (gene)|LSR',
	'LSS' : 'LSS (gene)|LSS',
	'LTA' : 'LTA (gene)|LTA',
	'LTF' : 'LTF (gene)|LTF',
	'LTK' : 'LTK (gene)|LTK',
	'LUM' : 'LUM (gene)|LUM',
	'MADD' : 'MADD (gene)|MADD',
	'MAF' : 'MAF (gene)|MAF',
	'MAFB' : 'MAFB (gene)|MAFB',
	'MAFF' : 'MAFF (gene)|MAFF',
	'MAG' : 'MAG (gene)|MAG',
	'MAK' : 'MAK (gene)|MAK',
	'MAL' : 'MAL (gene)|MAL',
	'MANBA' : 'MANBA (gene)|MANBA',
	'MASP2' : 'MASP2 (gene)|MASP2',
	'MATK' : 'MATK (gene)|MATK',
	'MAX' : 'MAX (gene)|MAX',
	'MAZ' : 'MAZ (gene)|MAZ',
	'MB' : 'MB (gene)|MB',
	'MBP' : 'MBP (gene)|MBP',
	'MCC' : 'MCC (gene)|MCC',
	'MCU' : 'MCU (gene)|MCU',
	'MDK' : 'MDK (gene)|MDK',
	'ME1' : 'ME1 (gene)|ME1',
	'ME2' : 'ME2 (gene)|ME2',
	'ME3' : 'ME3 (gene)|ME3',
	'MESD' : 'MESD (gene)|MESD',
	'MEST' : 'MEST (gene)|MEST',
	'MET' : 'MET (gene)|MET',
	'MFF' : 'MFF (gene)|MFF',
	'MGA' : 'MGA (gene)|MGA',
	'MGP' : 'MGP (gene)|MGP',
	'MIA' : 'MIA (gene)|MIA',
	'MIB1' : 'MIB1 (gene)|MIB1',
	'MICA' : 'MICA (gene)|MICA',
	'MIF' : 'MIF (gene)|MIF',
	'MIP' : 'MIP (gene)|MIP',
	'MLN' : 'MLN (gene)|MLN',
	'MLX' : 'MLX (gene)|MLX',
	'MMD' : 'MMD (gene)|MMD',
	'MME' : 'MME (gene)|MME',
	'MN1' : 'MN1 (gene)|MN1',
	'MNT' : 'MNT (gene)|MNT',
	'MOG' : 'MOG (gene)|MOG',
	'MOS' : 'MOS (gene)|MOS',
	'MPG' : 'MPG (gene)|MPG',
	'MPI' : 'MPI (gene)|MPI',
	'MPL' : 'MPL (gene)|MPL',
	'MPO' : 'MPO (gene)|MPO',
	'MR1' : 'MR1 (gene)|MR1',
	'MRM2' : 'MRM2 (gene)|MRM2',
	'MRO' : 'MRO (gene)|MRO',
	'MSC' : 'MSC (gene)|MSC',
	'MSRA' : 'MSRA (gene)|MSRA',
	'MT3' : 'MT3 (gene)|MT3',
	'MTRR' : 'MTRR (gene)|MTRR',
	'MYB' : 'MYB (gene)|MYB',
	'NADK' : 'NADK (gene)|NADK',
	'NAIP' : 'NAIP (gene)|NAIP',
	'NANOG' : 'NANOG (gene)|NANOG',
	'NAPA' : 'NAPA (gene)|NAPA',
	'NARF' : 'NARF (gene)|NARF',
	'NASP' : 'NASP (gene)|NASP',
	'NBAS' : 'NBAS (gene)|NBAS',
	'NBN' : 'NBN (gene)|NBN',
	'NCL' : 'NCL (gene)|NCL',
	'NCS1' : 'NCS1 (gene)|NCS1',
	'NDN' : 'NDN (gene)|NDN',
	'NDP' : 'NDP (gene)|NDP',
	'NEB' : 'NEB (gene)|NEB',
	'NF1' : 'NF1 (gene)|NF1',
	'NF2' : 'NF2 (gene)|NF2',
	'NFIB' : 'NFIB (gene)|NFIB',
	'NGB' : 'NGB (gene)|NGB',
	'NGF' : 'NGF (gene)|NGF',
	'NIN' : 'NIN (gene)|NIN',
	'NLN' : 'NLN (gene)|NLN',
	'NMB' : 'NMB (gene)|NMB',
	'NMI' : 'NMI (gene)|NMI',
	'NMS' : 'NMS (gene)|NMS',
	'NMT1' : 'NMT1 (gene)|NMT1',
	'NMU' : 'NMU (gene)|NMU',
	'NNT' : 'NNT (gene)|NNT',
	'NOG' : 'NOG (gene)|NOG',
	'NPAT' : 'NPAT (gene)|NPAT',
	'NPL' : 'NPL (gene)|NPL',
	'NPPA' : 'NPPA (gene)|NPPA',
	'NPPC' : 'NPPC (gene)|NPPC',
	'NPS' : 'NPS (gene)|NPS',
	'NQO2' : 'NQO2 (gene)|NQO2',
	'NRAS' : 'NRAS (gene)|NRAS',
	'NRDC' : 'NRDC (gene)|NRDC',
	'NRM' : 'NRM (gene)|NRM',
	'NSF' : 'NSF (gene)|NSF',
	'NTM' : 'NTM (gene)|NTM',
	'NTS' : 'NTS (gene)|NTS',
	'NUMB' : 'NUMB (gene)|NUMB',
	'NYX' : 'NYX (gene)|NYX',
	'OAF' : 'OAF (gene)|OAF',
	'OAT' : 'OAT (gene)|OAT',
	'OCM' : 'OCM (gene)|OCM',
	'OGA' : 'OGA (gene)|OGA',
	'OGN' : 'OGN (gene)|OGN',
	'OGT' : 'OGT (gene)|OGT',
	'OMD' : 'OMD (gene)|OMD',
	'OMG' : 'OMG (gene)|OMG',
	'OMP' : 'OMP (gene)|OMP',
	'OPTN' : 'OPTN (gene)|OPTN',
	'OS9' : 'OS9 (gene)|OS9',
	'OSM' : 'OSM (gene)|OSM',
	'OSTC' : 'OSTC (gene)|OSTC',
	'OTC' : 'OTC (gene)|OTC',
	'OTP' : 'OTP (gene)|OTP',
	'OXT' : 'OXT (gene)|OXT',
	'PAH' : 'PAH (gene)|PAH',
	'PAM' : 'PAM (gene)|PAM',
	'PBK' : 'PBK (gene)|PBK',
	'PC' : 'PC (gene)|PC',
	'PCCA' : 'PCCA (gene)|PCCA',
	'PDC' : 'PDC (gene)|PDC',
	'PEMT' : 'PEMT (gene)|PEMT',
	'PFAS' : 'PFAS (gene)|PFAS',
	'PGC' : 'PGC (gene)|PGC',
	'PGD' : 'PGD (gene)|PGD',
	'PGF' : 'PGF (gene)|PGF',
	'PGP' : 'PGP (gene)|PGP',
	'PGR' : 'PGR (gene)|PGR',
	'PHB' : 'PHB (gene)|PHB',
	'PI3' : 'PI3 (gene)|PI3',
	'PIGS' : 'PIGS (gene)|PIGS',
	'PIP' : 'PIP (gene)|PIP',
	'PIR' : 'PIR (gene)|PIR',
	'PISD' : 'PISD (gene)|PISD',
	'PKM' : 'PKM (gene)|PKM',
	'PLAT' : 'PLAT (gene)|PLAT',
	'PLG' : 'PLG (gene)|PLG',
	'PLN' : 'PLN (gene)|PLN',
	'PMEL' : 'PMEL (gene)|PMEL',
	'PML' : 'PML (gene)|PML',
	'PNN' : 'PNN (gene)|PNN',
	'PNP' : 'PNP (gene)|PNP',
	'POLB' : 'POLB (gene)|POLB',
	'POLE' : 'POLE (gene)|POLE',
	'POR' : 'POR (gene)|POR',
	'PPID' : 'PPID (gene)|PPID',
	'PRCC' : 'PRCC (gene)|PRCC',
	'PRL' : 'PRL (gene)|PRL',
	'PRLR' : 'PRLR (gene)|PRLR',
	'PROC' : 'PROC (gene)|PROC',
	'PROZ' : 'PROZ (gene)|PROZ',
	'PRX' : 'PRX (gene)|PRX',
	'PRY' : 'PRY (gene)|PRY',
	'PSAP' : 'PSAP (gene)|PSAP',
	'PSCA' : 'PSCA (gene)|PSCA',
	'PSD' : 'PSD (gene)|PSD',
	'PTEN' : 'PTEN (gene)|PTEN',
	'PTH' : 'PTH (gene)|PTH',
	'PTMS' : 'PTMS (gene)|PTMS',
	'PTN' : 'PTN (gene)|PTN',
	'PTS' : 'PTS (gene)|PTS',
	'PVR' : 'PVR (gene)|PVR',
	'PZP' : 'PZP (gene)|PZP',
	'RAD1' : 'RAD1 (gene)|RAD1',
	'RAD50' : 'RAD50 (gene)|RAD50',
	'RAMAC' : 'RAMAC (gene)|RAMAC',
	'RAN' : 'RAN (gene)|RAN',
	'RARA' : 'RARA (gene)|RARA',
	'RARB' : 'RARB (gene)|RARB',
	'REN' : 'REN (gene)|REN',
	'RET' : 'RET (gene)|RET',
	'RGN' : 'RGN (gene)|RGN',
	'RGR' : 'RGR (gene)|RGR',
	'RHCE' : 'RHCE (gene)|RHCE',
	'RHD' : 'RHD (gene)|RHD',
	'RILP' : 'RILP (gene)|RILP',
	'RLF' : 'RLF (gene)|RLF',
	'RORC' : 'RORC (gene)|RORC',
	'RP2' : 'RP2 (gene)|RP2',
	'RPE' : 'RPE (gene)|RPE',
	'RTCA' : 'RTCA (gene)|RTCA',
	'RYK' : 'RYK (gene)|RYK',
	'SACS' : 'SACS (gene)|SACS',
	'SAG' : 'SAG (gene)|SAG',
	'SARAF' : 'SARAF (gene)|SARAF',
	'SAT1' : 'SAT1 (gene)|SAT1',
	'SCAI' : 'SCAI (gene)|SCAI',
	'SCAP' : 'SCAP (gene)|SCAP',
	'SCD' : 'SCD (gene)|SCD',
	'SCT' : 'SCT (gene)|SCT',
	'SCX' : 'SCX (gene)|SCX',
	'SDF2' : 'SDF2 (gene)|SDF2',
	'SDS' : 'SDS (gene)|SDS',
	'SDSL' : 'SDSL (gene)|SDSL',
	'SET' : 'SET (gene)|SET',
	'SF1' : 'SF1 (gene)|SF1',
	'SFN' : 'SFN (gene)|SFN',
	'SGCD' : 'SGCD (gene)|SGCD',
	'SHB' : 'SHB (gene)|SHB',
	'SHE' : 'SHE (gene)|SHE',
	'SHF' : 'SHF (gene)|SHF',
	'SHH' : 'SHH (gene)|SHH',
	'SKI' : 'SKI (gene)|SKI',
	'SLA' : 'SLA (gene)|SLA',
	'SLK' : 'SLK (gene)|SLK',
	'SLN' : 'SLN (gene)|SLN',
	'SMO' : 'SMO (gene)|SMO',
	'SNCA' : 'SNCA (gene)|SNCA',
	'SNN' : 'SNN (gene)|SNN',
	'SON' : 'SON (gene)|SON',
	'SOS2' : 'SOS2 (gene)|SOS2',
	'SP1' : 'SP1 (gene)|SP1',
	'SP2' : 'SP2 (gene)|SP2',
	'SP3' : 'SP3 (gene)|SP3',
	'SP4' : 'SP4 (gene)|SP4',
	'SP5' : 'SP5 (gene)|SP5',
	'SP6' : 'SP6 (gene)|SP6',
	'SP7' : 'SP7 (gene)|SP7',
	'SP8' : 'SP8 (gene)|SP8',
	'SP9' : 'SP9 (gene)|SP9',
	'SPIC' : 'SPIC (gene)|SPIC',
	'SPN' : 'SPN (gene)|SPN',
	'SPP1' : 'SPP1 (gene)|SPP1',
	'SPR' : 'SPR (gene)|SPR',
	'SPX' : 'SPX (gene)|SPX',
	'SRC' : 'SRC (gene)|SRC',
	'SRF' : 'SRF (gene)|SRF',
	'SRI' : 'SRI (gene)|SRI',
	'SRL' : 'SRL (gene)|SRL',
	'SRM' : 'SRM (gene)|SRM',
	'SRR' : 'SRR (gene)|SRR',
	'SRRT' : 'SRRT (gene)|SRRT',
	'SSB' : 'SSB (gene)|SSB',
	'SST' : 'SST (gene)|SST',
	'SSX4' : 'SSX4 (gene)|SSX4',
	'STAC' : 'STAC (gene)|STAC',
	'STAM' : 'STAM (gene)|STAM',
	'STAR' : 'STAR (gene)|STAR',
	'STH' : 'STH (gene)|STH',
	'STS' : 'STS (gene)|STS',
	'SYNC' : 'SYNC (gene)|SYNC',
	'TANK' : 'TANK (gene)|TANK',
	'TAT' : 'TAT (gene)|TAT',
	'TAZ' : 'TAZ (gene)|TAZ',
	'TBCC' : 'TBCC (gene)|TBCC',
	'TBK1' : 'TBK1 (gene)|TBK1',
	'TBP' : 'TBP (gene)|TBP',
	'TCAP' : 'TCAP (gene)|TCAP',
	'TDG' : 'TDG (gene)|TDG',
	'TEC' : 'TEC (gene)|TEC',
	'TEF' : 'TEF (gene)|TEF',
	'TEK' : 'TEK (gene)|TEK',
	'TES' : 'TES (gene)|TES',
	'TESC' : 'TESC (gene)|TESC',
	'TF' : 'TF (gene)|TF',
	'TFG' : 'TFG (gene)|TFG',
	'TG' : 'TG (gene)|TG',
	'TGFA' : 'TGFA (gene)|TGFA',
	'TH' : 'TH (gene)|TH',
	'THPO' : 'THPO (gene)|THPO',
	'TIMELESS' : 'TIMELESS (gene)|TIMELESS',
	'TKT' : 'TKT (gene)|TKT',
	'TNC' : 'TNC (gene)|TNC',
	'TNN' : 'TNN (gene)|TNN',
	'TNR' : 'TNR (gene)|TNR',
	'TPO' : 'TPO (gene)|TPO',
	'TPR' : 'TPR (gene)|TPR',
	'TPT1' : 'TPT1 (gene)|TPT1',
	'TRA' : 'TRA (gene)|TRA',
	'TRB' : 'TRB (gene)|TRB',
	'TRD' : 'TRD (gene)|TRD',
	'TRG' : 'TRG (gene)|TRG',
	'TRIL' : 'TRIL (gene)|TRIL',
	'TRIO' : 'TRIO (gene)|TRIO',
	'TRO' : 'TRO (gene)|TRO',
	'TSN' : 'TSN (gene)|TSN',
	'TSPO' : 'TSPO (gene)|TSPO',
	'TSR2' : 'TSR2 (gene)|TSR2',
	'TST' : 'TST (gene)|TST',
	'TTK' : 'TTK (gene)|TTK',
	'TTL' : 'TTL (gene)|TTL',
	'TTN' : 'TTN (gene)|TTN',
	'TTR' : 'TTR (gene)|TTR',
	'TUB' : 'TUB (gene)|TUB',
	'TXN' : 'TXN (gene)|TXN',
	'TYR' : 'TYR (gene)|TYR',
	'UBB' : 'UBB (gene)|UBB',
	'UBD' : 'UBD (gene)|UBD',
	'UCN' : 'UCN (gene)|UCN',
	'UMPS' : 'UMPS (gene)|UMPS',
	'UNG' : 'UNG (gene)|UNG',
	'UNK' : 'UNK (gene)|UNK',
	'UST' : 'UST (gene)|UST',
	'UTY' : 'UTY (gene)|UTY',
	'VCL' : 'VCL (gene)|VCL',
	'VCP' : 'VCP (gene)|VCP',
	'VCY' : 'VCY (gene)|VCY',
	'VDR' : 'VDR (gene)|VDR',
	'VIM' : 'VIM (gene)|VIM',
	'VIT' : 'VIT (gene)|VIT',
	'VWF' : 'VWF (gene)|VWF',
	'WAC' : 'WAC (gene)|WAC',
	'WAS' : 'WAS (gene)|WAS',
	'WASL' : 'WASL (gene)|WASL',
	'WIZ' : 'WIZ (gene)|WIZ',
	'WLS' : 'WLS (gene)|WLS',
	'WRN' : 'WRN (gene)|WRN',
	'XDH' : 'XDH (gene)|XDH',
	'XG' : 'XG (gene)|XG',
	'XK' : 'XK (gene)|XK',
	'XPC' : 'XPC (gene)|XPC',
	'ZAN' : 'ZAN (gene)|ZAN',
	'ZYX' : 'ZYX (gene)|ZYX',

	# Approved gene symbols (links to non-gene pages):
	'AAMP' : 'AAMP (gene)|AAMP',
	'AARD' : 'AARD (gene)|AARD',
	'ADSL' : 'ADSL (gene)|ADSL',
	'AK1' : 'AK1 (gene)|AK1',
	'AK4' : 'AK4 (gene)|AK4',
	'AK5' : 'AK5 (gene)|AK5',
	'AK9' : 'AK9 (gene)|AK9',
	'ALLC' : 'ALLC (gene)|ALLC',
	'APEH' : 'APEH (gene)|APEH',
	'APOD' : 'APOD (gene)|APOD',
	'ARSF' : 'ARSF (gene)|ARSF',
	'ASL' : 'ASL (gene)|ASL',
	'ATRIP' : 'ATRIP (gene)|ATRIP',
	'AVEN' : 'AVEN (gene)|AVEN',
	'AVIL' : 'AVIL (gene)|AVIL',
	'BATF' : 'BATF (gene)|BATF',
	'BBC3' : 'BBC3 (gene)|BBC3',
	'BIVM' : 'BIVM (gene)|BIVM',
	'BMX' : 'BMX (gene)|BMX',
	'BRF1' : 'BRF1 (gene)|BRF1',
	'BRF2' : 'BRF2 (gene)|BRF2',
	'CAD' : 'CAD (gene)|CAD',
	'CBS' : 'CBS (gene)|CBS',
	'CBSL' : 'CBSL (gene)|CBSL',
	'CCN2' : 'CCN2 (gene)|CCN2',
	'CCN3' : 'CCN3 (gene)|CCN3',
	'CCN4' : 'CCN4 (gene)|CCN4',
	'CCNC' : 'CCNC (gene)|CCNC',
	'CCNY' : 'CCNY (gene)|CCNY',
	'CCSAP' : 'CCSAP (gene)|CCSAP',
	'CCT2' : 'CCT2 (gene)|CCT2',
	'CCT5' : 'CCT5 (gene)|CCT5',
	'CEPT1' : 'CEPT1 (gene)|CEPT1',
	'CES3' : 'CES3 (gene)|CES3',
	'CGAS' : 'CGAS (gene)|CGAS',
	'CGB2' : 'CGB2 (gene)|CGB2',
	'CGB3' : 'CGB3 (gene)|CGB3',
	'CHGA' : 'CHGA (gene)|CHGA',
	'CHIA' : 'CHIA (gene)|CHIA',
	'CHML' : 'CHML (gene)|CHML',
	'CHP2' : 'CHP2 (gene)|CHP2',
	'CINP' : 'CINP (gene)|CINP',
	'CIPC' : 'CIPC (gene)|CIPC',
	'CKLF' : 'CKLF (gene)|CKLF',
	'CLK3' : 'CLK3 (gene)|CLK3',
	'CLK4' : 'CLK4 (gene)|CLK4',
	'CLMP' : 'CLMP (gene)|CLMP',
	'CLTA' : 'CLTA (gene)|CLTA',
	'CMC2' : 'CMC2 (gene)|CMC2',
	'CNN3' : 'CNN3 (gene)|CNN3',
	'CPA4' : 'CPA4 (gene)|CPA4',
	'CRB2' : 'CRB2 (gene)|CRB2',
	'CRCP' : 'CRCP (gene)|CRCP',
	'CROT' : 'CROT (gene)|CROT',
	'CSF3' : 'CSF3 (gene)|CSF3',
	'CSH2' : 'CSH2 (gene)|CSH2',
	'CSN3' : 'CSN3 (gene)|CSN3',
	'CST9' : 'CST9 (gene)|CST9',
	'CTSH' : 'CTSH (gene)|CTSH',
	'CTSW' : 'CTSW (gene)|CTSW',
	'CTU2' : 'CTU2 (gene)|CTU2',
	'CUTA' : 'CUTA (gene)|CUTA',
	'CYREN' : 'CYREN (gene)|CYREN',
	'DDT' : 'DDT (gene)|DDT',
	'DMTN' : 'DMTN (gene)|DMTN',
	'DMWD' : 'DMWD (gene)|DMWD',
	'DNA2' : 'DNA2 (gene)|DNA2',
	'DSEL' : 'DSEL (gene)|DSEL',
	'DR1' : 'DR1 (gene)|DR1',
	'DST' : 'DST (gene)|DST',
	'DSTN' : 'DSTN (gene)|DSTN',
	'DTL' : 'DTL (gene)|DTL',
	'DXO' : 'DXO (gene)|DXO',
	'EBF3' : 'EBF3 (gene)|EBF3',
	'ELL2' : 'ELL2 (gene)|ELL2',
	'EME2' : 'EME2 (gene)|EME2',
	'EN1' : 'EN1 (gene)|EN1',
	'EN2' : 'EN2 (gene)|EN2',
	'ENSA' : 'ENSA (gene)|ENSA',
	'EPOP' : 'EPOP (gene)|EPOP',
	'EPYC' : 'EPYC (gene)|EPYC',
	'ESAM' : 'ESAM (gene)|ESAM',
	'ESPN' : 'ESPN (gene)|ESPN',
	'ETDA' : 'ETDA (gene)|ETDA',
	'ETV2' : 'ETV2 (gene)|ETV2',
	'ETV3' : 'ETV3 (gene)|ETV3',
	'FARSA' : 'FARSA (gene)|FARSA',
	'FLNC' : 'FLNC (gene)|FLNC',
	'FMOD' : 'FMOD (gene)|FMOD',
	'FRY' : 'FRY (gene)|FRY',
	'GALP' : 'GALP (gene)|GALP',
	'GATB' : 'GATB (gene)|GATB',
	'GATM' : 'GATM (gene)|GATM',
	'GBA' : 'GBA (gene)|GBA',
	'GFY' : 'GFY (gene)|GFY',
	'GGCT' : 'GGCT (gene)|GGCT',
	'GMDS' : 'GMDS (gene)|GMDS',
	'GMIP' : 'GMIP (gene)|GMIP',
	'GPS2' : 'GPS2 (gene)|GPS2',
	'GPX2' : 'GPX2 (gene)|GPX2',
	'HECA' : 'HECA (gene)|HECA',
	'HPCA' : 'HPCA (gene)|HPCA',
	'HPX' : 'HPX (gene)|HPX',
	'ICOS' : 'ICOS (gene)|ICOS',
	'ID3' : 'ID3 (gene)|ID3',
	'IRGC' : 'IRGC (gene)|IRGC',
	'ISX' : 'ISX (gene)|ISX',
	'KAT7' : 'KAT7 (gene)|KAT7',
	'KAZN' : 'KAZN (gene)|KAZN',
	'KDSR' : 'KDSR (gene)|KDSR',
	'KEL' : 'KEL (gene)|KEL',
	'KIZ' : 'KIZ (gene)|KIZ',
	'KLB' : 'KLB (gene)|KLB',
	'KLLN' : 'KLLN (gene)|KLLN',
	'KMO' : 'KMO (gene)|KMO',
	'KNCN' : 'KNCN (gene)|KNCN',
	'KYNU' : 'KYNU (gene)|KYNU',
	'LEP' : 'LEP (gene)|LEP',
	'LIPI' : 'LIPI (gene)|LIPI',
	'LIPK' : 'LIPK (gene)|LIPK',
	'LIPN' : 'LIPN (gene)|LIPN',
	'LOX' : 'LOX (gene)|LOX',
	'LTV1' : 'LTV1 (gene)|LTV1',
	'LVRN' : 'LVRN (gene)|LVRN',
	'LXN' : 'LXN (gene)|LXN',
	'MAFA' : 'MAFA (gene)|MAFA',
	'MAGIX' : 'MAGIX (gene)|MAGIX',
	'MAL2' : 'MAL2 (gene)|MAL2',
	'MAVS' : 'MAVS (gene)|MAVS',
	'MBIP' : 'MBIP (gene)|MBIP',
	'MCAT' : 'MCAT (gene)|MCAT',
	'MGMT' : 'MGMT (gene)|MGMT',
	'MIB2' : 'MIB2 (gene)|MIB2',
	'MIDN' : 'MIDN (gene)|MIDN',
	'MIOS' : 'MIOS (gene)|MIOS',
	'MLEC' : 'MLEC (gene)|MLEC',
	'MLIP' : 'MLIP (gene)|MLIP',
	'MOK' : 'MOK (gene)|MOK',
	'MPEG1' : 'MPEG1 (gene)|MPEG1',
	'MRAP' : 'MRAP (gene)|MRAP',
	'MRM1' : 'MRM1 (gene)|MRM1',
	'MSLN' : 'MSLN (gene)|MSLN',
	'MSN' : 'MSN (gene)|MSN',
	'MSX2' : 'MSX2 (gene)|MSX2',
	'MT4' : 'MT4 (gene)|MT4',
	'MTR' : 'MTR (gene)|MTR',
	'MVD' : 'MVD (gene)|MVD',
	'MVK' : 'MVK (gene)|MVK',
	'MVP' : 'MVP (gene)|MVP',
	'MYNN' : 'MYNN (gene)|MYNN',
	'NACA' : 'NACA (gene)|NACA',
	'NAGA' : 'NAGA (gene)|NAGA',
	'NANP' : 'NANP (gene)|NANP',
	'NBL1' : 'NBL1 (gene)|NBL1',
	'NEBL' : 'NEBL (gene)|NEBL',
	'NEMF' : 'NEMF (gene)|NEMF',
	'NES' : 'NES (gene)|NES',
	'NFIC' : 'NFIC (gene)|NFIC',
	'NGEF' : 'NGEF (gene)|NGEF',
	'NHS' : 'NHS (gene)|NHS',
	'NKRF' : 'NKRF (gene)|NKRF',
	'NNAT' : 'NNAT (gene)|NNAT',
	'NPB' : 'NPB (gene)|NPB',
	'NRK' : 'NRK (gene)|NRK',
	'NRL' : 'NRL (gene)|NRL',
	'NVL' : 'NVL (gene)|NVL',
	'NXN' : 'NXN (gene)|NXN',
	'ODAM' : 'ODAM (gene)|ODAM',
	'OSCAR' : 'OSCAR (gene)|OSCAR',
	'OSR2' : 'OSR2 (gene)|OSR2',
	'OSTN' : 'OSTN (gene)|OSTN',
	'PATJ' : 'PATJ (gene)|PATJ',
	'PCP2' : 'PCP2 (gene)|PCP2',
	'PCTP' : 'PCTP (gene)|PCTP',
	'PDF' : 'PDF (gene)|PDF',
	'PIFO' : 'PIFO (gene)|PIFO',
	'PIGN' : 'PIGN (gene)|PIGN',
	'PIM2' : 'PIM2 (gene)|PIM2',
	'PLAA' : 'PLAA (gene)|PLAA',
	'PMCH' : 'PMCH (gene)|PMCH',
	'PNOC' : 'PNOC (gene)|PNOC',
	'POP1' : 'POP1 (gene)|POP1',
	'POP4' : 'POP4 (gene)|POP4',
	'PPCS' : 'PPCS (gene)|PPCS',
	'PPIE' : 'PPIE (gene)|PPIE',
	'PPIG' : 'PPIG (gene)|PPIG',
	'PPL' : 'PPL (gene)|PPL',
	'PREP' : 'PREP (gene)|PREP',
	'PRTG' : 'PRTG (gene)|PRTG',
	'PSD2' : 'PSD2 (gene)|PSD2',
	'PSG1' : 'PSG1 (gene)|PSG1',
	'QPRT' : 'QPRT (gene)|QPRT',
	'RAX' : 'RAX (gene)|RAX',
	'RBFA' : 'RBFA (gene)|RBFA',
	'RDX' : 'RDX (gene)|RDX',
	'REST' : 'REST (gene)|REST',
	'RFK' : 'RFK (gene)|RFK',
	'RGL4' : 'RGL4 (gene)|RGL4',
	'RHO' : 'RHO (gene)|RHO',
	'RHOF' : 'RHOF (gene)|RHOF',
	'RHOV' : 'RHOV (gene)|RHOV',
	'RTL4' : 'RTL4 (gene)|RTL4',
	'RTL5' : 'RTL5 (gene)|RTL5',
	'RTL9' : 'RTL9 (gene)|RTL9',
	'RTP1' : 'RTP1 (gene)|RTP1',
	'RTP2' : 'RTP2 (gene)|RTP2',
	'RTP3' : 'RTP3 (gene)|RTP3',
	'RTP4' : 'RTP4 (gene)|RTP4',
	'RTP5' : 'RTP5 (gene)|RTP5',
	'SBSN' : 'SBSN (gene)|SBSN',
	'SCEL' : 'SCEL (gene)|SCEL',
	'SCIMP' : 'SCIMP (gene)|SCIMP',
	'SCLY' : 'SCLY (gene)|SCLY',
	'SDHC' : 'SDHC (gene)|SDHC',
	'SELL' : 'SELL (gene)|SELL',
	'SHD' : 'SHD (gene)|SHD',
	'SHPK' : 'SHPK (gene)|SHPK',
	'SI' : 'SI (gene)|SI',
	'SIAE' : 'SIAE (gene)|SIAE',
	'SMS' : 'SMS (gene)|SMS',
	'SNCB' : 'SNCB (gene)|SNCB',
	'SP100' : 'SP100 (gene)|SP100',
	'SPARC' : 'SPARC (gene)|SPARC',
	'SPRN' : 'SPRN (gene)|SPRN',
	'SRMS' : 'SRMS (gene)|SRMS',
	'SSX3' : 'SSX3 (gene)|SSX3',
	'STYX' : 'STYX (gene)|STYX',
	'SUCO' : 'SUCO (gene)|SUCO',
	'SVOP' : 'SVOP (gene)|SVOP',
	'SYK' : 'SYK (gene)|SYK',
	'SYP' : 'SYP (gene)|SYP',
	'TACR2' : 'TACR2 (gene)|TACR2',
	'TEPP' : 'TEPP (gene)|TEPP',
	'THEMIS' : 'THEMIS (gene)|THEMIS',
	'TIFA' : 'TIFA (gene)|TIFA',
	'TSR1' : 'TSR1 (gene)|TSR1',
	'TXK' : 'TXK (gene)|TXK',
	'UBC' : 'UBC (gene)|UBC',
	'UFC1' : 'UFC1 (gene)|UFC1',
	'USF3' : 'USF3 (gene)|USF3',
	'VASP' : 'VASP (gene)|VASP',
	'VHLL' : 'VHLL (gene)|VHLL',
	'VIP' : 'VIP (gene)|VIP',
	'VMAC' : 'VMAC (gene)|VMAC',
	'WAPL' : 'WAPL (gene)|WAPL',
	'WDCP' : 'WDCP (gene)|WDCP',
	'WTAP' : 'WTAP (gene)|WTAP',
	'WTIP' : 'WTIP (gene)|WTIP',
}


# Saving the 'protein-coding_gene.txt' file in the current working directory
def downloadGeneFile():
	print("Downloading \'" , readFile,"\' from the HGNC ftp server. This may take a few seconds.\n", sep="")
	ftp = ftplib.FTP('ftp.ebi.ac.uk')
	ftp.login()
	ftp.cwd('/pub/databases/genenames/new/tsv/locus_groups')
	with io.open(readFile, 'wb') as data:
		ftp.retrbinary('RETR protein-coding_gene.txt', data.write)

# Writing the wikitext files
def forWP():
	# Generating date variable for the "Complete list" template and the citation template variable for the wikitables
	currentMonthYear = f'{startTime:%B} {startTime.year}'
	currentDate = f'{startTime.day} {startTime:%B} {startTime.year}'
	url = "https://www.genenames.org/download/statistics-and-files/"
	title = "Statistics & download files"
	publisher = "HUGO Gene Nomenclature Committee"
	tableReference = "{{safesubst:#tag:ref|{{cite web | title = " + title + " | url = " + url + " | website = www.genenames.org | publisher = " + publisher + " | accessdate = " + currentDate + " | date = " + currentDate + "}}}}"
	navboxGenes=[]

	# Setting initial index value
	i = 0

	# This code block reads the HGNC protein-coding_gene.txt file and writes the source code of all four list pages to different text files
	with open(readFile, 'r', encoding='utf-8') as input:
		with open(writeFile1, 'w', encoding='utf-8') as wiki1, open(writeFile2, 'w', encoding='utf-8') as wiki2, open(writeFile3, 'w', encoding='utf-8') as wiki3, open(writeFile4, 'w', encoding='utf-8') as wiki4:
			# Setting the script to initially write wikitable data to the first file
			wiki = wiki1

			for line in csv.reader(input, delimiter="\t"):
				# Manually writing in table headers
				if i == 0:
					print("{{Complete list|date=" + currentMonthYear + "}}",file=wiki)
					print("{{:Wikipedia:WikiProject Molecular Biology/Molecular and Cell Biology/Human protein-coding genes}}", file=wiki)
					print("{| class=\"wikitable sortable\" style=\"margin: 1em auto;\"", file=wiki)
					print("|+ {{nowrap|[[Human protein-coding genes]] listed in the [[HGNC]] database", tableReference, "}}", sep="", file=wiki)
					print("! scope=\"col\" | index", file=wiki)
					print("! scope=\"col\" | [[Gene symbol]]", sep="", file=wiki)
					print("! scope=\"col\" | HGNC ID", sep="", file=wiki)
					print("! scope=\"col\" | [[UniProt]] ID(s)", sep="", file=wiki)
					print("|-", file=wiki)
					i+=1

				# NB: the "and line[5]=='Approved'" restriction excludes the "Entry withdrawn" symbols at the end of the 4th list page.  
				elif i > 0 and line[5]=='Approved':

					navboxIndices=[1,5000,5001,10000,10001,15000,15001]
					# Appending gene symbols to list of navbox gene symbol indices
					if i in navboxIndices:
						if line[1] in mistargetedLinkDictionary.keys():
							navboxGenes.append("[["+mistargetedLinkDictionary[line[1]]+"]]")
						else:
							navboxGenes.append("[["+line[1]+"]]")

					# Using the parser to get the rest of the table data
					# Index
					print("|" , i, sep="", file=wiki)
					# Gene symbol
					if line[1] in mistargetedLinkDictionary.keys():
						print("|[[",mistargetedLinkDictionary[line[1]],"]]", sep="", file=wiki)
					else:
						print("|[[",line[1],"]]", sep="", file=wiki)
					# HGNC ID
					print("|{{HGNC|", line[0][5:], "}}", sep="", file=wiki)
					# UNIPROT IDs for proteins
					uniprot = line[25]
					uniprotIDs = uniprot.split("|")
					print("|",  sep="",  end="", file=wiki)
					z = 1
					for id in uniprotIDs:
						print("{{uniprot|",id,"}}", sep="", end="", file=wiki)
						if z < len(uniprotIDs):
							print("", end="<br />", file=wiki)
						z+=1
					print("\n|-", file=wiki)

					i+=1

					# Setting the script to write wikitable data to the second, third, and fourth files as of entry 5001, 10001, and 15001
					if (i == 5001) or (i == 10001) or (i == 15001):
						print("|}", file=wiki)
						print("\n==References==\n{{Reflist}}", file=wiki)
						if (i == 5001):
							wiki = wiki2
						if (i == 10001):
							wiki = wiki3
						if (i == 15001):
							wiki = wiki4
						print("{{Complete list|date=" + currentMonthYear + "}}",file=wiki)
						print("{{:Wikipedia:WikiProject Molecular Biology/Molecular and Cell Biology/Human protein-coding genes}}", file=wiki)
						print("{| class=\"wikitable sortable\" style=\"margin: 1em auto;\"", file=wiki)
						print("|+ {{nowrap|[[Human protein-coding genes]] listed in the [[HGNC]] database", tableReference, "}}", sep="", file=wiki)
						print("! scope=\"col\" | index", file=wiki)
						print("! scope=\"col\" | [[Gene symbol]]", sep="", file=wiki)
						print("! scope=\"col\" | HGNC ID", sep="", file=wiki)
						print("! scope=\"col\" | [[UniProt]] ID(s)", sep="", file=wiki)
						print("|-", file=wiki)

					# Sets the "lastGeneSymbol" variable to the current loop index's gene symbol
					lastGeneSymbol=line[1]

			# Append the last gene symbol to the list of navbox indices
			if lastGeneSymbol in mistargetedLinkDictionary.keys():
				navboxGenes.append("[["+mistargetedLinkDictionary[lastGeneSymbol]+"]]")
			else:
				navboxGenes.append("[["+lastGeneSymbol+"]]")

			# Manually writing out the wikitable footer and references section for the fourth file
			print("|}", file=wiki)
			print("\n==References==\n{{Reflist}}", file=wiki)
		return navboxGenes

def runBot(navboxGenes, text=None, pauseOn=False, sandbox=False):
	# Functions for pausing and debugging errors during page writing
	def pause():
		return input("Press Enter to continue ...")

	def errorMessage():
		print("Something went wrong when writing the page. =<")
		print('Error: {}. {}, line: {}'.format(sys.exc_info()[0], sys.exc_info()[1], sys.exc_info()[2].tb_lineno))
	
	# Defining pages to edit
	if sandbox==True:
		sandboxPrefix="Wikipedia:WikiProject Molecular Biology/Molecular and Cell Biology/Human protein-coding genes"
		pageList=[sandboxPrefix+"1",sandboxPrefix+"2",sandboxPrefix+"3",sandboxPrefix+"4"]
	else:
		articlePrefix="List of human protein-coding genes "
		pageList=[articlePrefix+"1",articlePrefix+"2",articlePrefix+"3",articlePrefix+"4"]
	gene1 = pageList[0]
	gene2 = pageList[1]
	gene3 = pageList[2]
	gene4 = pageList[3]
	navbox= "Wikipedia:WikiProject Molecular Biology/Molecular and Cell Biology/Human protein-coding genes"

	# Logging in
	site = pybot.Site('en', 'wikipedia')
	site.login()

	# Default edit summary
	comment = 'Manually running the Python script to perform an unscheduled update'

	if type(text) == str:
		comment = text
	elif type(text) != str and text != None:
		print("The input text must be a string")
	else:
		pass
	with open(writeFile1, 'r', encoding='utf-8') as page1, open(writeFile2, 'r', encoding='utf-8') as page2, open(writeFile3, 'r', encoding='utf-8') as page3, open(writeFile4, 'r', encoding='utf-8') as page4:
		try:
			# Loading the first gene list page and rewriting the page content with the current HGNC data
			wikipage = pybot.Page(site, gene1)
			print("Updating HGNC data in ", wikipage.title(), sep="")
			lines = page1.read(1800000)
			wikipage.text = lines
			print(wikipage.text)
			if pauseOn==True:
				pause()
			wikipage.save(summary=comment, minor=False)
		
		except:
			pybot.logging.error(errorMessage())
		
		try:
			# Loading the second gene list page and rewriting the page content with the current HGNC data
			wikipage = pybot.Page(site, gene2)
			print("Updating HGNC data in ", wikipage.title(), sep="")
			lines = page2.read(1800000)
			wikipage.text = lines
			print(wikipage.text)
			if pauseOn==True:
				pause()
			wikipage.save(summary=comment, minor=False)
		
		except:
			pybot.logging.error(errorMessage())
		
		try:
			# Loading the third gene list page and rewriting the page content with the current HGNC data
			wikipage = pybot.Page(site, gene3)
			print("Updating HGNC data in ", wikipage.title(), sep="")
			lines = page3.read(1800000)
			wikipage.text = lines
			print(wikipage.text)
			if pauseOn==True:
				pause()
			wikipage.save(summary=comment, minor=False)
		
		except:
			pybot.logging.error(errorMessage())
		
		try:
			# Loading the fourth gene list page and rewriting the page content with the current HGNC data
			wikipage = pybot.Page(site, gene4)
			print("Updating HGNC data in ", wikipage.title(), sep="")
			lines = page4.read(1800000)
			wikipage.text = lines
			print(wikipage.text)
			if pauseOn==True:
				pause()
			wikipage.save(summary=comment, minor=False)

		except:
			pybot.logging.error(errorMessage())

	try:
		# Loading the navbox page and rewriting the page content with the current HGNC data
			wikipage = pybot.Page(site, navbox)
			print("Updating navbox page: ", wikipage.title(), sep="")

			regex =[r"(?<=1\]\] covers genes )[\S\d ]*?(?=<br \/>)", r"(?<=2\]\] covers genes )[\S\d ]*?(?=<br \/>)",
					r"(?<=3\]\] covers genes )[\S\d ]*?(?=<br \/>)", r"(?<=4\]\] covers genes )[\S\d ]*?(?=<br \/>)"]
			substitute=[navboxGenes[0]+"–"+navboxGenes[1], navboxGenes[2]+"–"+navboxGenes[3], 
						navboxGenes[4]+"–"+navboxGenes[5], navboxGenes[6]+"–"+navboxGenes[7]]

			text=wikipage.text
			for k in range(0,4):
				text=re.sub(regex[k],substitute[k],text)
			wikipage.text = text

			if pauseOn==True:
				pause()
			wikipage.save(summary="Updating gene list cutoffs", minor=False)

	except:
		pybot.logging.error(errorMessage())

	# Deleting the downloaded and auto-generated text files
	print("Deleting the text files used in this bot script:", readFile, writeFile1, writeFile2, writeFile3, writeFile4, sep="\n")
	os.remove(readFile)
	os.remove(writeFile1)
	os.remove(writeFile2)
	os.remove(writeFile3)
	os.remove(writeFile4)
	print("\nFinished writing to Wikipedia\n")

def main():
	downloadGeneFile()
	navboxGenes=forWP()
	runBot(navboxGenes, text="Performing gene list update" , pauseOn=False, sandbox=False)
	print("Time to execute script:", datetime.now() - startTime)

# Calling the functions defined above
if __name__ == "__main__":
	main()