Jump to content

User:Drinibot/ExtractWikilinks.py

From Wikipedia, the free encyclopedia
import re
import subprocess
rawfilename="raw.html"
linksfilename="links.txt"

fi=open(rawfilename,'r')
li=open(linksfilename,'w')
regex=re.compile(r".*.org/wiki/(?P<oldcat>.*?)\".*$")

for line in fi:
	m=regex.findall(line)
	if m: 
		if not ("Special:" ) in  m[0]:
			wl= "[["+ m[0] +"]]"
			print wl
			li.write(wl+"\n")
fi.close()
li.close()