User:Richiez/wiki-href

From Wikipedia, the free encyclopedia
Jump to: navigation, search

#!/usr/bin/ruby
#
# walk wiki article and replace {{pmid XXXX}} and {{isbn XXXX}} with 
# harvard style references.
# pmid/isbn generates {{Harvcol | Lastname [...]| year}}
#   and {{Anchor ..}}{{cite ...}} in References section
#  -txt, -nb suffix generates Harcoltxt resp Harvcolnb variants
#    eg {{pmid-nb 123456}}
#   using http://diberri.dyndns.org/
 
require 'cgi'
require 'net/http'
 
Host = "diberri.dyndns.org"
 
def get_session()
  return $session if $session
  if ENV["http_proxy"] =~ /http:\/\/(.*):(\d*)/
    $proxy_addr=$1
    $proxy_port=$2
    $session=Net::HTTP.new(Host, 80, $proxy_addr, $proxy_port)
  else
    $session=Net::HTTP.new(Host, 80)
  end
  return $session
end
 
def lookup_isbn(style,isbn)
  path = "/cgi-bin/templatefiller/index.cgi?ddb=&type=isbn&id="+isbn.to_s
  lookup(style,path)
end
def lookup_pmid(style,pmid)
  path = "/cgi-bin/templatefiller/index.cgi?ddb=&type=pubmed_id&id="+pmid.to_s
  lookup(style,path)
end
 
def lookup(style,path)
  #Net::HTTP.new(Host, 80, "127.0.0.1", 8080)
  session= get_session 
 
  headers = {
    "User-Agent" => "Dillo/0.8.5-i18n-misc",
    "Referer" => "http://www.google.com/language_tools?hl=en",
    "Accept-Language" => "en-us,en;q=0.5",
    "Accept-Encoding" => "gzip,deflate",
    "Accept-Charset" => "ISO-8859-1,utf-8;q=0.7,*;q=0.7",
    "Keep-Alive" => "300"
  }
  res=session.get( path, headers )
 
  unless /^2\d\d/ =~ res.code
    $stderr.printf "HTTP problem: %d %s\n", res.code, res.message
    `wwwoffle \"#{Host+path}\" ` if /^5\d\d/ =~ res.code
    return nil
  end
 
  txt=res.body
  if /<textarea.*?>\{\{(.*?)\}\}/ =~ txt then
    tmpl=$1.gsub(/&ndash;/,"--")
    tmpl=CGI.unescapeHTML(tmpl)
    tmpl=tmpl.gsub(/--/,"&ndash;")
    #printf "{{%s}}\n", tmpl
    authors=""
    year=""
    tmpl.split("|").each{|el|
      if /author=(.*)/ =~ el
        #printf "author:%s\n",$1
        authors=$1
      elsif  /year=(\d+)/ =~ el
        #printf "year:%s\n",$1
        year=$1
      end
    }
    xauthors=[]
    # Harv template expects max 4 authors without first names
    authors.split(",")[0..3].each{|el|
      unless /''et al''/ =~ el
        xauthors.push el.split(" ")[0..-2].join(" ") # get rid of first names
      end
    }
 
    case style
    when /-txt/ then stl="Harvcoltxt"
    when /-nb/ then stl="Harvcolnb"
    else stl="Harvcol"
    end
    #p xauthors
    harv="{{#{stl} |"+xauthors.flatten.join(" |")+" |"+year+" }}"
    reftag="CITEREF#{xauthors.flatten.join('_')+'_'+year}".gsub(" ","_")
    # must test what works.. 
    #reftag="CITEREF#{xauthors.flatten.join('_')+'_'+year}".gsub(/[^A-Za-z0-9]/,"_")
 
    return ["{{"+tmpl+"}}",harv,reftag]
  end
end
 
def replace_ref(orig,style,id)
  case orig
  when /pmid.*/i 
    type="pmid"
    res=lookup_pmid(style,id)
  when /isbn.*/i 
    type="pmid"
    res=lookup_isbn(style,id)
  end
 
  return orig unless res
  template,harv,reftag=res
 
  tag=type+id
  $references.push [tag,template,reftag] unless $references.assoc(tag)
 
  harv = harv + "<!-- #{orig} -->"
  return harv
end
 
# gets {{cite ..}}
def do_cite(tref)
  tag,citation,reftag = tref
 
  return "\n* {{anchor|#{reftag}}} #{citation}"
end
 
def ref_file(file)
  lines=file.readlines
  # 1st may be title or meta-information
  lines[2..-1].each{|line|
    # split {{(pmid|isbn)(|-nb|-txt) id-number}} into parts
    line.gsub!(/\{\{(pmid|isbn)(.*?)\s+(.*?)\}\}/i){|orig|
      replace_ref orig,$2,$3
    }
  }
 
  txt=lines.to_s
  references="<!-- automatically generated references block -->\n"
 
  #references=references+$references.collect{|el| "\n* "+el[1]}.to_s
  references=references+$references.collect{|el| do_cite(el)}.to_s
  references=references+"\n<!-- end of automatically generated references block -->\n"
  # insert at top of References section, user must sort it manually
  if /^==References==/ =~ txt
    txt.sub!(/^==References==/){|orig| orig+references }
  # no reference section yet? Create one right before stubs and Categories
  elsif /(\{\{.*stub\}\}|\[\[Category:.*\]\])/ =~ txt
    txt.sub!(/(\{\{.*stub\}\}|\[\[Category:.*\]\]).*/){|orig| 
      "==References==\n"+references+"\n"+orig }
  else
    txt=txt+"==References==\n"+references
  end
  printf "%s", txt
end
 
ARGV.each{|arg|
  $references=[]
  fl=File.open(arg,"r")
  ref_file fl
}