Jump to content

User:EdwardsBot/Configuration: Difference between revisions

From Wikipedia, the free encyclopedia
Content deleted Content added
→‎deliverybot-2.py: updated section
→‎deliverybot-2.py: updated section
Line 5: Line 5:


import codecs
import codecs
import MySQLdb
import os
import os
import re
import re
Line 13: Line 12:


# Define variables
# Define variables
dbname = settings.dbname
directory = settings.directory
username = settings.username
username = settings.username
user_namespace = settings.user_namespace
user_namespace = settings.user_namespace
Line 29: Line 28:
def kill_self():
def kill_self():
os._exit(0)
os._exit(0)

def check_replag(cursor):
cursor.execute('''
/* deliverybot-2.py SLOW_OK */
SELECT
UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp)
FROM recentchanges
ORDER BY rc_timestamp DESC
LIMIT 1;
''')
return cursor.fetchone()[0]


def check_status(page):
def check_status(page):
Line 51: Line 39:
return
return


def top_user(cursor, dbname, namespace, page):
def top_user(wiki, page):
params = {'action' : 'query',
cursor.execute('''
'prop' : 'revisions',
/* deliverybot-2.py SLOW_OK */
'rvprop' : 'user',
SELECT
'titles' : page}
rev_user_text
req = wikitools.api.APIRequest(wiki, params)
FROM revision
response = req.query()
JOIN page
latest_editor = response[u'query'][u'pages'].popitem()[1][u'revisions'][0][u'user']
ON rev_page = page_id
return latest_editor
JOIN toolserver.namespace
ON dbname = %s
AND page_namespace = ns_id
WHERE ns_name = %s
AND page_title = %s
ORDER BY rev_timestamp DESC
LIMIT 1;
''' , (dbname,
re.sub('_', ' ', user_namespace),
re.sub(' ', '_', spam).lstrip(user_namespace + ':')))
return u'%s' % unicode(cursor.fetchone()[0].replace(' ', '_'), 'utf-8')


def trusted_users(cursor, dbname, namespace, page):
def trusted_users(wiki, page):
trusted_users = []
cursor.execute('''
/* deliverybot-2.py SLOW_OK */
SELECT DISTINCT
pl_title
FROM pagelinks
JOIN page
ON pl_from = page_id
JOIN toolserver.namespace
ON dbname = %s
AND page_namespace = ns_id
WHERE ns_name = %s
AND page_title = %s
AND pl_namespace IN (2,3);
''' , (dbname,
re.sub('_', ' ', user_namespace),
re.sub(' ', '_', access_list).lstrip(user_namespace + ':')))
return [u'%s' % unicode(row[0], 'utf-8') for row in cursor.fetchall()]


params = {'action' : 'query',
def excluded_users(cursor, dbname, username):
'prop' : 'links',
cursor.execute('''
'titles' : page,
/* deliverybot-2.py SLOW_OK */
'pllimit' : 500,
SELECT DISTINCT
'plnamespace' : 2}
pl_title
req = wikitools.api.APIRequest(wiki, params)
FROM pagelinks
response = req.query()
JOIN page
users = response[u'query'][u'pages'].popitem()[1][u'links']
ON pl_from = page_id
for entry in users:
JOIN toolserver.namespace
trusted_users.append(entry[u'title'].split(':', 1)[1])
ON dbname = '%s'
return trusted_users
AND page_namespace = ns_id

WHERE ns_name = 'Wikipedia'
def excluded_users(wiki, username):
AND page_title IN ('Bots/Opt-out', 'Bots/Opt-out/%s')
excluded_users = []
AND pl_namespace IN (2,3);

''' % (dbname,
username))
params = {'action' : 'query',
'prop' : 'links',
return [u'%s' % unicode(row[0].replace('_', ' '), 'utf-8') for row in cursor.fetchall()]
'titles' : '|'.join(['Wikipedia:Bots/Opt-out',
'Wikipedia:Bots/Opt-out/%s' % username]),
'pllimit' : 500,
'plnamespace' : 2}
req = wikitools.api.APIRequest(wiki, params)
response = req.query()
pages = response[u'query'][u'pages']
for k,v in pages.iteritems():
try:
users = v[u'links']
for entry in users:
excluded_users.append(entry[u'title'].split(':', 1)[1])
except KeyError:
pass
return excluded_users


def retrieve_config(page):
def retrieve_config(page):
Line 113: Line 90:
spam_page_text_parts = re.search(r'''\<source lang="text" enclose="div">
spam_page_text_parts = re.search(r'''\<source lang="text" enclose="div">
# KEY(.+)
# KEY(.+)
# RECIPIENTS \(CATEGORY\)(.+)
# RECIPIENTS \(PAGE LIST\)(.+)
# RECIPIENTS \(PAGE LIST\)(.+)
# SUBJECT(.+)
# SUBJECT(.+)
Line 119: Line 95:
</source>''', spam_page_text, re.I|re.U|re.M|re.DOTALL)
</source>''', spam_page_text, re.I|re.U|re.M|re.DOTALL)
return { 'key': spam_page_text_parts.group(1),
return { 'key': spam_page_text_parts.group(1),
'recip_cat': spam_page_text_parts.group(2),
'recip_page': spam_page_text_parts.group(2),
'recip_page': spam_page_text_parts.group(3),
'subject': spam_page_text_parts.group(3),
'subject': spam_page_text_parts.group(4),
'body': spam_page_text_parts.group(4) }
'body': spam_page_text_parts.group(5),
}


def read_keys():
def read_keys():
f = open('/home/mzmcbride/scripts/deliverybot/deliverybot-keys.txt', 'r')
f = open(directory+'deliverybot-keys.txt', 'r')
keys = f.read().strip('\n').split('\n')
keys = f.read().strip('\n').split('\n')
f.close()
f.close()
Line 132: Line 106:


def add_key(key):
def add_key(key):
f = open('/home/mzmcbride/scripts/deliverybot/deliverybot-keys.txt', 'a')
f = open(directory+'deliverybot-keys.txt', 'a')
f.write('%s\n' % key)
f.write('%s\n' % key)
f.close()
f.close()
return
return


def page_list_targets(cursor, dbname, full_page_title):
def page_list_targets(wiki, full_page_title):
page_list_targets = set()
input_page_namespace = full_page_title.split(':')[0]
input_page_namespace_clean = re.sub('_', ' ', input_page_namespace)
input_page_title = full_page_title.split(':')[1]
input_page_title_clean = re.sub(' ', '_', input_page_title)
cursor.execute('''
/* deliverybot-2.py SLOW_OK */
SELECT DISTINCT
pl_title
FROM page
JOIN pagelinks
ON pl_from = page_id
JOIn toolserver.namespace
ON dbname = %s
AND page_namespace = ns_id
WHERE pl_namespace IN (2,3)
AND ns_name = %s
AND page_title = %s
ORDER BY pl_title ASC;
''' , (dbname, input_page_namespace_clean, input_page_title_clean))
return [u'%s' % unicode(row[0], 'utf-8') for row in cursor.fetchall()]


params = {'action' : 'query',
def category_targets(cursor, category):
'prop' : 'links',
# FIXME: Doesn't support "Foo"; only supports "Category:Foo"
'titles' : full_page_title,
input_cat_title = category.split(':')[1]
'pllimit' : 500,
input_cat_title_clean = re.sub(' ', '_', input_cat_title)
'plnamespace' : '2|3'}
cursor.execute('''
req = wikitools.api.APIRequest(wiki, params)
/* deliverybot-2.py SLOW_OK */
response = req.query()
SELECT DISTINCT
users = response[u'query'][u'pages'].popitem()[1][u'links']
SUBSTRING_INDEX(page_title, '/', 1) AS page_title
FROM page
for entry in users:
page_list_targets.add(entry[u'title'].split(':', 1)[1])
JOIN categorylinks
print page_list_targets
ON cl_from = page_id
return page_list_targets
WHERE cl_to = %s
AND page_namespace IN (2,3);
''' , (input_cat_title_clean))
return [u'%s' % unicode(row[0], 'utf-8') for row in cursor.fetchall()]


def edit_talk_page(user_talk):
def edit_talk_page(user_talk):
Line 217: Line 169:


# Start actually doing something
# Start actually doing something
status = check_status(status_page)
try:
conn = MySQLdb.connect(host=settings.host,
db=settings.dbname,
read_default_file='~/.my.cnf')
cursor = conn.cursor()
replag = check_replag(cursor)
status = check_status(status_page)
except MySQLdb.OperationalError:
replag = 500
status = 'locked out'


log = codecs.open('/home/mzmcbride/scripts/deliverybot/deliverybot-log.txt', 'a', 'utf-8')
log = codecs.open(directory+'deliverybot-log.txt', 'a', 'utf-8')
if status == 'locked out':
log.write('locked out\n')


if replag < 300 or status == 'really start':
if status in ('start', 'run', 'really start', 'restart'):
log_in()
#log.write('lag is fine, let\'s edit\n')
top_user = top_user(wiki, spam)
if status == 'start' or status == 'really start':
trusted_users = trusted_users(wiki, access_list)
log_in()
excluded_users = excluded_users(wiki, username)
top_user = top_user(cursor, dbname, user_namespace, spam)
old_keys = read_keys()
trusted_users = trusted_users(cursor, dbname, user_namespace, access_list)
configuration = retrieve_config(spam)
excluded_users = excluded_users(cursor, dbname, username)
current_key = strip_cruft(configuration['key'])
old_keys = read_keys()
input_page = strip_cruft(configuration['recip_page'])
configuration = retrieve_config(spam)
current_key = strip_cruft(configuration['key'])
subject_line = strip_cruft(configuration['subject'])
input_cat = strip_cruft(configuration['recip_cat'])
body_text = strip_cruft(configuration['body'])
body_text_final = body_text + '\n<!-- %s %s -->' % (username, current_key)
input_page = strip_cruft(configuration['recip_page'])
subject_line = strip_cruft(configuration['subject'])
body_text = strip_cruft(configuration['body'])
body_text_final = body_text + '\n<!-- %s %s -->' % (username, current_key)


log.write('status is fine, let\'s edit\n')
log.write('status is fine, let\'s edit\n')
if top_user in trusted_users:
if top_user in trusted_users:
log.write('auth is fine, let\'s edit\n')
log.write('auth is fine, let\'s edit\n')
if current_key not in old_keys or status == 'really start':
if current_key not in old_keys or status in ('really start', 'restart'):
log.write('key is fine, let\'s edit\n')
log.write('key is fine, let\'s edit\n')
if len(subject_line) > 245:
if len(subject_line) > 245:
log.write('subject line is too large, edit status page indicating so and die\n')
log.write('subject line is too large, edit status page indicating so and die\n')
change_status('Error: Subject line is too long')
change_status('Error: Subject line is too long')
kill_self()
kill_self()
else:
if input_cat != '' and input_page != '':
log.write('error, both inputs defined, edit status page and die\n')
change_status('Error: Input list is ambiguous; use a page list \'\'or\'\' a category')
kill_self()
elif input_cat != '':
log.write('we\'ll use the category input by default\n')
change_status('Running')
for target in category_targets(cursor, input_cat):
# Edit here!
edit_talk_page(target)
add_key(current_key)
change_status('Completed run successfully')
elif input_page != '':
log.write('use the page list\n')
change_status('Running')
for target in page_list_targets(cursor, dbname, input_page):
# Edit here!
edit_talk_page(target)
add_key(current_key)
change_status('Completed run successfully')
else:
log.write('error, neither input defined, edit status page and die\n')
change_status('Error: No valid target list could be found')
kill_self()
else:
else:
log.write('key is old, edit status page to indicate such and die\n')
if input_page != '':
change_status('Error: Key is invalid')
log.write('use the page list\n')
kill_self()
change_status('Running')
for target in page_list_targets(wiki, input_page):
# Edit here!
edit_talk_page(target)
add_key(current_key)
change_status('Completed run successfully')
else:
else:
log.write('user not authorized to use bot; edit status page to indicate such and die\n')
log.write('key is old, edit status page to indicate such and die\n')
change_status('Error: User [[%s|not authorized]] to use bot' % access_list)
change_status('Error: Key is invalid')
kill_self()
kill_self()
else:
else:
log.write('user not authorized to use bot; edit status page to indicate such and die\n')
pass
change_status('Error: User [[%s|not authorized]] to use bot' % access_list)

else:
if status == 'start':
log.write('will edit status page here to indicate high lag and will die\n')
log_in()
change_status('Error: [[tswiki:Replag|Replication lag]] is too high currently')
kill_self()
kill_self()
elif status != 'locked out':
log.write('will just die; replag too high\n')


if status != 'locked out':
cursor.close()
conn.close()
log.close()
log.close()
</syntaxhighlight>
</syntaxhighlight>

Revision as of 22:58, 16 November 2012

deliverybot-2.py

#! /usr/bin/env python
# Public domain; MZMcBride, bjweeks; 2011

import codecs
import os
import re
import time
import wikitools
import settings

# Define variables
directory = settings.directory
username = settings.username
user_namespace = settings.user_namespace
access_list = user_namespace + ':' + username + '/' + settings.access_list
log = user_namespace + ':' + username + '/' + settings.log
status_page = user_namespace + ':' + username + '/' + settings.status
spam = user_namespace + ':' + username + '/' + settings.spam
wiki = wikitools.Wiki(settings.apiurl); wiki.setMaxlag(-1)

# Functions to do various tasks
def log_in():
    wiki.login(settings.username, settings.password)
    return

def kill_self():
    os._exit(0)

def check_status(page):
    status_page_obj = wikitools.Page(wiki, status_page, followRedir=False)
    return status_page_obj.getWikiText().lower()

def change_status(status_message):
    status_page_obj = wikitools.Page(wiki, status_page, followRedir=False)
    status_page_obj.edit(status_message, summary='[[WP:BOT|Bot]]: Updated status.', bot=1)
    log.write('will edit with content: %s\n' % status_message)
    return

def top_user(wiki, page):
    params = {'action'  : 'query',
              'prop'    : 'revisions',
              'rvprop'  : 'user',
              'titles'  : page}
    req = wikitools.api.APIRequest(wiki, params)
    response = req.query()
    latest_editor = response[u'query'][u'pages'].popitem()[1][u'revisions'][0][u'user']
    return latest_editor

def trusted_users(wiki, page):
    trusted_users = []

    params = {'action'      : 'query',
              'prop'        : 'links',
              'titles'      : page,
              'pllimit'     : 500,
              'plnamespace' : 2}
    req = wikitools.api.APIRequest(wiki, params)
    response = req.query()
    users = response[u'query'][u'pages'].popitem()[1][u'links']
    for entry in users:
        trusted_users.append(entry[u'title'].split(':', 1)[1])
    return trusted_users

def excluded_users(wiki, username):
    excluded_users = []

    params = {'action'      : 'query',
              'prop'        : 'links',
              'titles'      : '|'.join(['Wikipedia:Bots/Opt-out',
                                        'Wikipedia:Bots/Opt-out/%s' % username]),
              'pllimit'     : 500,
              'plnamespace' : 2}
    req = wikitools.api.APIRequest(wiki, params)
    response = req.query()
    pages = response[u'query'][u'pages']
    for k,v in pages.iteritems():
        try:
            users = v[u'links']
            for entry in users:
                excluded_users.append(entry[u'title'].split(':', 1)[1])
        except KeyError:
            pass
    return excluded_users

def retrieve_config(page):
    spam_page = wikitools.Page(wiki, spam, followRedir=False)
    spam_page_text = spam_page.getWikiText()
    spam_page_text_parts = re.search(r'''\<source lang="text" enclose="div">
# KEY(.+)
# RECIPIENTS \(PAGE LIST\)(.+)
# SUBJECT(.+)
# BODY(.+)
</source>''', spam_page_text, re.I|re.U|re.M|re.DOTALL)
    return { 'key': spam_page_text_parts.group(1),
             'recip_page': spam_page_text_parts.group(2),
             'subject': spam_page_text_parts.group(3),
             'body': spam_page_text_parts.group(4) }

def read_keys():
    f = open(directory+'deliverybot-keys.txt', 'r')
    keys = f.read().strip('\n').split('\n')
    f.close()
    return list(keys)

def add_key(key):
    f = open(directory+'deliverybot-keys.txt', 'a')
    f.write('%s\n' % key)
    f.close()
    return

def page_list_targets(wiki, full_page_title):
    page_list_targets = set()

    params = {'action'      : 'query',
              'prop'        : 'links',
              'titles'      : full_page_title,
              'pllimit'     : 500,
              'plnamespace' : '2|3'}
    req = wikitools.api.APIRequest(wiki, params)
    response = req.query()
    users = response[u'query'][u'pages'].popitem()[1][u'links']
    for entry in users:
        page_list_targets.add(entry[u'title'].split(':', 1)[1])
    print page_list_targets
    return page_list_targets

def edit_talk_page(user_talk):
    global excluded_users
    user_talk = wikitools.Page(wiki, 'User talk:%s' % target, followRedir=True)
    try:
        page_text = user_talk.getWikiText()
    except:
        page_text = ''
    try:
        if target in excluded_users:
            log.write('Excluded user: %s\n' % target)
        elif not re.search(r'(<!-- %s %s -->)' % (username, current_key), page_text, re.I|re.U):
            user_talk.edit(text=body_text_final, summary=subject_line, section='new', bot=1, skipmd5=True)
            log.write('Edited: %s\n' % target)
        else:
            log.write('Skipped: %s\n' % target)
    except:
        time.sleep(2)
        try:
            try:
                page_text = user_talk.getWikiText()
            except:
                page_text = ''
            if target in excluded_users:
                log.write('Excluded user: %s\n' % target)
            elif not re.search(r'(<!-- %s %s -->)' % (username, current_key), page_text, re.I|re.U):
                user_talk.edit(text=body_text_final, summary=subject_line, section='new', bot=1, skipmd5=True)
                log.write('Edited: %s\n' % target)
            else:
                log.write('Skipped: %s\n' % target)
        except:
            log.write('WTF1 %s\n' % target)
            pass
        log.write('WTF2 %s\n' % target)
        pass
    return

def strip_cruft(str):
    str = re.sub(r'^(\s|\n)*', '', str)
    str = re.sub(r'(\s|\n)*$', '', str)
    return str

# Start actually doing something
status = check_status(status_page)

log = codecs.open(directory+'deliverybot-log.txt', 'a', 'utf-8')

if status in ('start', 'run', 'really start', 'restart'):
    log_in()
    top_user = top_user(wiki, spam)
    trusted_users = trusted_users(wiki, access_list)
    excluded_users = excluded_users(wiki, username)
    old_keys = read_keys()
    configuration = retrieve_config(spam)
    current_key = strip_cruft(configuration['key'])
    input_page = strip_cruft(configuration['recip_page'])
    subject_line = strip_cruft(configuration['subject'])
    body_text = strip_cruft(configuration['body'])
    body_text_final = body_text + '\n<!-- %s %s -->' % (username, current_key)

    log.write('status is fine, let\'s edit\n')
    if top_user in trusted_users:
        log.write('auth is fine, let\'s edit\n')
        if current_key not in old_keys or status in ('really start', 'restart'):
            log.write('key is fine, let\'s edit\n')
            if len(subject_line) > 245:
                log.write('subject line is too large, edit status page indicating so and die\n')
                change_status('Error: Subject line is too long')
                kill_self()
            else:
                if input_page != '':
                    log.write('use the page list\n')
                    change_status('Running')
                    for target in page_list_targets(wiki, input_page):
                        # Edit here!
                        edit_talk_page(target)
                    add_key(current_key)
                    change_status('Completed run successfully')
        else:
            log.write('key is old, edit status page to indicate such and die\n')
            change_status('Error: Key is invalid')
            kill_self()
    else:
        log.write('user not authorized to use bot; edit status page to indicate such and die\n')
        change_status('Error: User [[%s|not authorized]] to use bot' % access_list)
        kill_self()

log.close()

crontab

0,5,10,15,20,25,30,35,40,45,50,55 * * * * PYTHONPATH=$HOME/scripts python $HOME/scripts/deliverybot/deliverybot-2.py