User:EdwardsBot/Configuration: Difference between revisions
Appearance
Content deleted Content added
→deliverybot-2.py: updated section |
→deliverybot-2.py: updated section |
||
Line 5: | Line 5: | ||
import codecs |
import codecs |
||
import MySQLdb |
|||
import os |
import os |
||
import re |
import re |
||
Line 13: | Line 12: | ||
# Define variables |
# Define variables |
||
directory = settings.directory |
|||
username = settings.username |
username = settings.username |
||
user_namespace = settings.user_namespace |
user_namespace = settings.user_namespace |
||
Line 29: | Line 28: | ||
def kill_self(): |
def kill_self(): |
||
os._exit(0) |
os._exit(0) |
||
def check_replag(cursor): |
|||
cursor.execute(''' |
|||
/* deliverybot-2.py SLOW_OK */ |
|||
SELECT |
|||
UNIX_TIMESTAMP() - UNIX_TIMESTAMP(rc_timestamp) |
|||
FROM recentchanges |
|||
ORDER BY rc_timestamp DESC |
|||
LIMIT 1; |
|||
''') |
|||
return cursor.fetchone()[0] |
|||
def check_status(page): |
def check_status(page): |
||
Line 51: | Line 39: | ||
return |
return |
||
def top_user( |
def top_user(wiki, page): |
||
params = {'action' : 'query', |
|||
cursor.execute(''' |
|||
'prop' : 'revisions', |
|||
/* deliverybot-2.py SLOW_OK */ |
|||
'rvprop' : 'user', |
|||
SELECT |
|||
'titles' : page} |
|||
rev_user_text |
|||
req = wikitools.api.APIRequest(wiki, params) |
|||
FROM revision |
|||
response = req.query() |
|||
JOIN page |
|||
latest_editor = response[u'query'][u'pages'].popitem()[1][u'revisions'][0][u'user'] |
|||
ON rev_page = page_id |
|||
return latest_editor |
|||
JOIN toolserver.namespace |
|||
ON dbname = %s |
|||
AND page_namespace = ns_id |
|||
WHERE ns_name = %s |
|||
AND page_title = %s |
|||
ORDER BY rev_timestamp DESC |
|||
LIMIT 1; |
|||
''' , (dbname, |
|||
re.sub('_', ' ', user_namespace), |
|||
re.sub(' ', '_', spam).lstrip(user_namespace + ':'))) |
|||
return u'%s' % unicode(cursor.fetchone()[0].replace(' ', '_'), 'utf-8') |
|||
def trusted_users( |
def trusted_users(wiki, page): |
||
trusted_users = [] |
|||
cursor.execute(''' |
|||
/* deliverybot-2.py SLOW_OK */ |
|||
SELECT DISTINCT |
|||
pl_title |
|||
FROM pagelinks |
|||
JOIN page |
|||
ON pl_from = page_id |
|||
JOIN toolserver.namespace |
|||
ON dbname = %s |
|||
AND page_namespace = ns_id |
|||
WHERE ns_name = %s |
|||
AND page_title = %s |
|||
AND pl_namespace IN (2,3); |
|||
''' , (dbname, |
|||
re.sub('_', ' ', user_namespace), |
|||
re.sub(' ', '_', access_list).lstrip(user_namespace + ':'))) |
|||
return [u'%s' % unicode(row[0], 'utf-8') for row in cursor.fetchall()] |
|||
params = {'action' : 'query', |
|||
def excluded_users(cursor, dbname, username): |
|||
'prop' : 'links', |
|||
cursor.execute(''' |
|||
'titles' : page, |
|||
/* deliverybot-2.py SLOW_OK */ |
|||
'pllimit' : 500, |
|||
SELECT DISTINCT |
|||
'plnamespace' : 2} |
|||
pl_title |
|||
req = wikitools.api.APIRequest(wiki, params) |
|||
FROM pagelinks |
|||
response = req.query() |
|||
JOIN page |
|||
users = response[u'query'][u'pages'].popitem()[1][u'links'] |
|||
ON pl_from = page_id |
|||
for entry in users: |
|||
JOIN toolserver.namespace |
|||
trusted_users.append(entry[u'title'].split(':', 1)[1]) |
|||
ON dbname = '%s' |
|||
return trusted_users |
|||
AND page_namespace = ns_id |
|||
WHERE ns_name = 'Wikipedia' |
|||
def excluded_users(wiki, username): |
|||
AND page_title IN ('Bots/Opt-out', 'Bots/Opt-out/%s') |
|||
excluded_users = [] |
|||
AND pl_namespace IN (2,3); |
|||
''' % (dbname, |
|||
params = {'action' : 'query', |
|||
'prop' : 'links', |
|||
return [u'%s' % unicode(row[0].replace('_', ' '), 'utf-8') for row in cursor.fetchall()] |
|||
'titles' : '|'.join(['Wikipedia:Bots/Opt-out', |
|||
'Wikipedia:Bots/Opt-out/%s' % username]), |
|||
'pllimit' : 500, |
|||
'plnamespace' : 2} |
|||
req = wikitools.api.APIRequest(wiki, params) |
|||
response = req.query() |
|||
pages = response[u'query'][u'pages'] |
|||
for k,v in pages.iteritems(): |
|||
try: |
|||
users = v[u'links'] |
|||
for entry in users: |
|||
excluded_users.append(entry[u'title'].split(':', 1)[1]) |
|||
except KeyError: |
|||
pass |
|||
return excluded_users |
|||
def retrieve_config(page): |
def retrieve_config(page): |
||
Line 113: | Line 90: | ||
spam_page_text_parts = re.search(r'''\<source lang="text" enclose="div"> |
spam_page_text_parts = re.search(r'''\<source lang="text" enclose="div"> |
||
# KEY(.+) |
# KEY(.+) |
||
# RECIPIENTS \(CATEGORY\)(.+) |
|||
# RECIPIENTS \(PAGE LIST\)(.+) |
# RECIPIENTS \(PAGE LIST\)(.+) |
||
# SUBJECT(.+) |
# SUBJECT(.+) |
||
Line 119: | Line 95: | ||
</source>''', spam_page_text, re.I|re.U|re.M|re.DOTALL) |
</source>''', spam_page_text, re.I|re.U|re.M|re.DOTALL) |
||
return { 'key': spam_page_text_parts.group(1), |
return { 'key': spam_page_text_parts.group(1), |
||
' |
'recip_page': spam_page_text_parts.group(2), |
||
' |
'subject': spam_page_text_parts.group(3), |
||
' |
'body': spam_page_text_parts.group(4) } |
||
'body': spam_page_text_parts.group(5), |
|||
} |
|||
def read_keys(): |
def read_keys(): |
||
f = open(' |
f = open(directory+'deliverybot-keys.txt', 'r') |
||
keys = f.read().strip('\n').split('\n') |
keys = f.read().strip('\n').split('\n') |
||
f.close() |
f.close() |
||
Line 132: | Line 106: | ||
def add_key(key): |
def add_key(key): |
||
f = open(' |
f = open(directory+'deliverybot-keys.txt', 'a') |
||
f.write('%s\n' % key) |
f.write('%s\n' % key) |
||
f.close() |
f.close() |
||
return |
return |
||
def page_list_targets( |
def page_list_targets(wiki, full_page_title): |
||
page_list_targets = set() |
|||
input_page_namespace = full_page_title.split(':')[0] |
|||
input_page_namespace_clean = re.sub('_', ' ', input_page_namespace) |
|||
input_page_title = full_page_title.split(':')[1] |
|||
input_page_title_clean = re.sub(' ', '_', input_page_title) |
|||
cursor.execute(''' |
|||
/* deliverybot-2.py SLOW_OK */ |
|||
SELECT DISTINCT |
|||
pl_title |
|||
FROM page |
|||
JOIN pagelinks |
|||
ON pl_from = page_id |
|||
JOIn toolserver.namespace |
|||
ON dbname = %s |
|||
AND page_namespace = ns_id |
|||
WHERE pl_namespace IN (2,3) |
|||
AND ns_name = %s |
|||
AND page_title = %s |
|||
ORDER BY pl_title ASC; |
|||
''' , (dbname, input_page_namespace_clean, input_page_title_clean)) |
|||
return [u'%s' % unicode(row[0], 'utf-8') for row in cursor.fetchall()] |
|||
params = {'action' : 'query', |
|||
def category_targets(cursor, category): |
|||
'prop' : 'links', |
|||
# FIXME: Doesn't support "Foo"; only supports "Category:Foo" |
|||
'titles' : full_page_title, |
|||
input_cat_title = category.split(':')[1] |
|||
'pllimit' : 500, |
|||
input_cat_title_clean = re.sub(' ', '_', input_cat_title) |
|||
'plnamespace' : '2|3'} |
|||
cursor.execute(''' |
|||
req = wikitools.api.APIRequest(wiki, params) |
|||
/* deliverybot-2.py SLOW_OK */ |
|||
response = req.query() |
|||
SELECT DISTINCT |
|||
users = response[u'query'][u'pages'].popitem()[1][u'links'] |
|||
SUBSTRING_INDEX(page_title, '/', 1) AS page_title |
|||
for entry in users: |
|||
page_list_targets.add(entry[u'title'].split(':', 1)[1]) |
|||
JOIN categorylinks |
|||
print page_list_targets |
|||
ON cl_from = page_id |
|||
return page_list_targets |
|||
WHERE cl_to = %s |
|||
AND page_namespace IN (2,3); |
|||
''' , (input_cat_title_clean)) |
|||
return [u'%s' % unicode(row[0], 'utf-8') for row in cursor.fetchall()] |
|||
def edit_talk_page(user_talk): |
def edit_talk_page(user_talk): |
||
Line 217: | Line 169: | ||
# Start actually doing something |
# Start actually doing something |
||
status = check_status(status_page) |
|||
try: |
|||
conn = MySQLdb.connect(host=settings.host, |
|||
db=settings.dbname, |
|||
read_default_file='~/.my.cnf') |
|||
cursor = conn.cursor() |
|||
replag = check_replag(cursor) |
|||
status = check_status(status_page) |
|||
except MySQLdb.OperationalError: |
|||
replag = 500 |
|||
status = 'locked out' |
|||
log = codecs.open(' |
log = codecs.open(directory+'deliverybot-log.txt', 'a', 'utf-8') |
||
if status == 'locked out': |
|||
log.write('locked out\n') |
|||
if |
if status in ('start', 'run', 'really start', 'restart'): |
||
log_in() |
|||
#log.write('lag is fine, let\'s edit\n') |
|||
top_user = top_user(wiki, spam) |
|||
if status == 'start' or status == 'really start': |
|||
trusted_users = trusted_users(wiki, access_list) |
|||
log_in() |
|||
excluded_users = excluded_users(wiki, username) |
|||
top_user = top_user(cursor, dbname, user_namespace, spam) |
|||
old_keys = read_keys() |
|||
trusted_users = trusted_users(cursor, dbname, user_namespace, access_list) |
|||
configuration = retrieve_config(spam) |
|||
excluded_users = excluded_users(cursor, dbname, username) |
|||
current_key = strip_cruft(configuration['key']) |
|||
old_keys = read_keys() |
|||
input_page = strip_cruft(configuration['recip_page']) |
|||
configuration = retrieve_config(spam) |
|||
subject_line = strip_cruft(configuration['subject']) |
|||
body_text = strip_cruft(configuration['body']) |
|||
body_text_final = body_text + '\n<!-- %s %s -->' % (username, current_key) |
|||
input_page = strip_cruft(configuration['recip_page']) |
|||
subject_line = strip_cruft(configuration['subject']) |
|||
body_text = strip_cruft(configuration['body']) |
|||
body_text_final = body_text + '\n<!-- %s %s -->' % (username, current_key) |
|||
log.write('status is fine, let\'s edit\n') |
|||
if top_user in trusted_users: |
|||
log.write('auth is fine, let\'s edit\n') |
|||
if current_key not in old_keys or status in ('really start', 'restart'): |
|||
log.write('key is fine, let\'s edit\n') |
|||
if len(subject_line) > 245: |
|||
log.write('subject line is too large, edit status page indicating so and die\n') |
|||
change_status('Error: Subject line is too long') |
|||
kill_self() |
|||
else: |
|||
if input_cat != '' and input_page != '': |
|||
log.write('error, both inputs defined, edit status page and die\n') |
|||
change_status('Error: Input list is ambiguous; use a page list \'\'or\'\' a category') |
|||
kill_self() |
|||
elif input_cat != '': |
|||
log.write('we\'ll use the category input by default\n') |
|||
change_status('Running') |
|||
for target in category_targets(cursor, input_cat): |
|||
# Edit here! |
|||
edit_talk_page(target) |
|||
add_key(current_key) |
|||
change_status('Completed run successfully') |
|||
elif input_page != '': |
|||
log.write('use the page list\n') |
|||
change_status('Running') |
|||
for target in page_list_targets(cursor, dbname, input_page): |
|||
# Edit here! |
|||
edit_talk_page(target) |
|||
add_key(current_key) |
|||
change_status('Completed run successfully') |
|||
else: |
|||
log.write('error, neither input defined, edit status page and die\n') |
|||
change_status('Error: No valid target list could be found') |
|||
kill_self() |
|||
else: |
else: |
||
if input_page != '': |
|||
log.write('use the page list\n') |
|||
change_status('Running') |
|||
for target in page_list_targets(wiki, input_page): |
|||
# Edit here! |
|||
edit_talk_page(target) |
|||
add_key(current_key) |
|||
change_status('Completed run successfully') |
|||
else: |
else: |
||
log.write(' |
log.write('key is old, edit status page to indicate such and die\n') |
||
change_status('Error: |
change_status('Error: Key is invalid') |
||
kill_self() |
kill_self() |
||
else: |
else: |
||
log.write('user not authorized to use bot; edit status page to indicate such and die\n') |
|||
pass |
|||
change_status('Error: User [[%s|not authorized]] to use bot' % access_list) |
|||
else: |
|||
if status == 'start': |
|||
log.write('will edit status page here to indicate high lag and will die\n') |
|||
log_in() |
|||
change_status('Error: [[tswiki:Replag|Replication lag]] is too high currently') |
|||
kill_self() |
kill_self() |
||
elif status != 'locked out': |
|||
log.write('will just die; replag too high\n') |
|||
if status != 'locked out': |
|||
cursor.close() |
|||
conn.close() |
|||
log.close() |
log.close() |
||
</syntaxhighlight> |
</syntaxhighlight> |
Revision as of 22:58, 16 November 2012
deliverybot-2.py
#! /usr/bin/env python
# Public domain; MZMcBride, bjweeks; 2011
import codecs
import os
import re
import time
import wikitools
import settings
# Define variables
directory = settings.directory
username = settings.username
user_namespace = settings.user_namespace
access_list = user_namespace + ':' + username + '/' + settings.access_list
log = user_namespace + ':' + username + '/' + settings.log
status_page = user_namespace + ':' + username + '/' + settings.status
spam = user_namespace + ':' + username + '/' + settings.spam
wiki = wikitools.Wiki(settings.apiurl); wiki.setMaxlag(-1)
# Functions to do various tasks
def log_in():
wiki.login(settings.username, settings.password)
return
def kill_self():
os._exit(0)
def check_status(page):
status_page_obj = wikitools.Page(wiki, status_page, followRedir=False)
return status_page_obj.getWikiText().lower()
def change_status(status_message):
status_page_obj = wikitools.Page(wiki, status_page, followRedir=False)
status_page_obj.edit(status_message, summary='[[WP:BOT|Bot]]: Updated status.', bot=1)
log.write('will edit with content: %s\n' % status_message)
return
def top_user(wiki, page):
params = {'action' : 'query',
'prop' : 'revisions',
'rvprop' : 'user',
'titles' : page}
req = wikitools.api.APIRequest(wiki, params)
response = req.query()
latest_editor = response[u'query'][u'pages'].popitem()[1][u'revisions'][0][u'user']
return latest_editor
def trusted_users(wiki, page):
trusted_users = []
params = {'action' : 'query',
'prop' : 'links',
'titles' : page,
'pllimit' : 500,
'plnamespace' : 2}
req = wikitools.api.APIRequest(wiki, params)
response = req.query()
users = response[u'query'][u'pages'].popitem()[1][u'links']
for entry in users:
trusted_users.append(entry[u'title'].split(':', 1)[1])
return trusted_users
def excluded_users(wiki, username):
excluded_users = []
params = {'action' : 'query',
'prop' : 'links',
'titles' : '|'.join(['Wikipedia:Bots/Opt-out',
'Wikipedia:Bots/Opt-out/%s' % username]),
'pllimit' : 500,
'plnamespace' : 2}
req = wikitools.api.APIRequest(wiki, params)
response = req.query()
pages = response[u'query'][u'pages']
for k,v in pages.iteritems():
try:
users = v[u'links']
for entry in users:
excluded_users.append(entry[u'title'].split(':', 1)[1])
except KeyError:
pass
return excluded_users
def retrieve_config(page):
spam_page = wikitools.Page(wiki, spam, followRedir=False)
spam_page_text = spam_page.getWikiText()
spam_page_text_parts = re.search(r'''\<source lang="text" enclose="div">
# KEY(.+)
# RECIPIENTS \(PAGE LIST\)(.+)
# SUBJECT(.+)
# BODY(.+)
</source>''', spam_page_text, re.I|re.U|re.M|re.DOTALL)
return { 'key': spam_page_text_parts.group(1),
'recip_page': spam_page_text_parts.group(2),
'subject': spam_page_text_parts.group(3),
'body': spam_page_text_parts.group(4) }
def read_keys():
f = open(directory+'deliverybot-keys.txt', 'r')
keys = f.read().strip('\n').split('\n')
f.close()
return list(keys)
def add_key(key):
f = open(directory+'deliverybot-keys.txt', 'a')
f.write('%s\n' % key)
f.close()
return
def page_list_targets(wiki, full_page_title):
page_list_targets = set()
params = {'action' : 'query',
'prop' : 'links',
'titles' : full_page_title,
'pllimit' : 500,
'plnamespace' : '2|3'}
req = wikitools.api.APIRequest(wiki, params)
response = req.query()
users = response[u'query'][u'pages'].popitem()[1][u'links']
for entry in users:
page_list_targets.add(entry[u'title'].split(':', 1)[1])
print page_list_targets
return page_list_targets
def edit_talk_page(user_talk):
global excluded_users
user_talk = wikitools.Page(wiki, 'User talk:%s' % target, followRedir=True)
try:
page_text = user_talk.getWikiText()
except:
page_text = ''
try:
if target in excluded_users:
log.write('Excluded user: %s\n' % target)
elif not re.search(r'(<!-- %s %s -->)' % (username, current_key), page_text, re.I|re.U):
user_talk.edit(text=body_text_final, summary=subject_line, section='new', bot=1, skipmd5=True)
log.write('Edited: %s\n' % target)
else:
log.write('Skipped: %s\n' % target)
except:
time.sleep(2)
try:
try:
page_text = user_talk.getWikiText()
except:
page_text = ''
if target in excluded_users:
log.write('Excluded user: %s\n' % target)
elif not re.search(r'(<!-- %s %s -->)' % (username, current_key), page_text, re.I|re.U):
user_talk.edit(text=body_text_final, summary=subject_line, section='new', bot=1, skipmd5=True)
log.write('Edited: %s\n' % target)
else:
log.write('Skipped: %s\n' % target)
except:
log.write('WTF1 %s\n' % target)
pass
log.write('WTF2 %s\n' % target)
pass
return
def strip_cruft(str):
str = re.sub(r'^(\s|\n)*', '', str)
str = re.sub(r'(\s|\n)*$', '', str)
return str
# Start actually doing something
status = check_status(status_page)
log = codecs.open(directory+'deliverybot-log.txt', 'a', 'utf-8')
if status in ('start', 'run', 'really start', 'restart'):
log_in()
top_user = top_user(wiki, spam)
trusted_users = trusted_users(wiki, access_list)
excluded_users = excluded_users(wiki, username)
old_keys = read_keys()
configuration = retrieve_config(spam)
current_key = strip_cruft(configuration['key'])
input_page = strip_cruft(configuration['recip_page'])
subject_line = strip_cruft(configuration['subject'])
body_text = strip_cruft(configuration['body'])
body_text_final = body_text + '\n<!-- %s %s -->' % (username, current_key)
log.write('status is fine, let\'s edit\n')
if top_user in trusted_users:
log.write('auth is fine, let\'s edit\n')
if current_key not in old_keys or status in ('really start', 'restart'):
log.write('key is fine, let\'s edit\n')
if len(subject_line) > 245:
log.write('subject line is too large, edit status page indicating so and die\n')
change_status('Error: Subject line is too long')
kill_self()
else:
if input_page != '':
log.write('use the page list\n')
change_status('Running')
for target in page_list_targets(wiki, input_page):
# Edit here!
edit_talk_page(target)
add_key(current_key)
change_status('Completed run successfully')
else:
log.write('key is old, edit status page to indicate such and die\n')
change_status('Error: Key is invalid')
kill_self()
else:
log.write('user not authorized to use bot; edit status page to indicate such and die\n')
change_status('Error: User [[%s|not authorized]] to use bot' % access_list)
kill_self()
log.close()
crontab
0,5,10,15,20,25,30,35,40,45,50,55 * * * * PYTHONPATH=$HOME/scripts python $HOME/scripts/deliverybot/deliverybot-2.py