User:ARandomName123/update bot
Appearance
# For use in Wikiproject Unreferenced articles backlog drives
# Based on a Python script written by User:Broc, adadpted to pywikibot by User:ARandomName123
# Please see https://www.mediawiki.org/wiki/Manual:Pywikibot/Installation to configure/login pywikibot
import pywikibot
import requests
import re
from bs4 import BeautifulSoup
site = pywikibot.Site('en', 'wikipedia') # The site we want to run our bot on
page = pywikibot.Page(site, 'User:ARandomName123/sandbox')
def get_n1_value(username, session):
# Construct the URL for the sigma tool
url = f'https://sigma.toolforge.org/summary.py?name={username}&search=FEB24&max=500&server=enwiki&ns=Main&enddate=20240201&startdate='
# Send a GET request to the URL using the session
response = session.get(url)
# Check if the request was successful (status code 200)
if response.status_code == 200:
# Parse the HTML content
soup = BeautifulSoup(response.content, 'html.parser')
# Find all unordered lists on the page
ul_elements = soup.find_all('ul')
# Check if there is a second <ul>
if len(ul_elements) >= 2:
second_ul_element = ul_elements[1]
# Find all list items in the second <ul>
li_elements = second_ul_element.find_all('li')
# List to store unique PAGENAMEs
unique_pagename_list = []
# Iterate through list items
for li_element in li_elements:
# Find all <a> elements within the <li>
a_elements = li_element.find_all('a')
# Check if there are at least 4 <a> elements
if len(a_elements) >= 4:
# Get the URL in the 4th <a> element
pagename_url = a_elements[3].get('href', '')
# Extract PAGENAME from the URL (you may need to adjust this based on the actual URL structure)
pagename_match = re.search(r'/wiki/([^?&#]+)', pagename_url)
if pagename_match:
pagename = pagename_match.group(1)
# Find the <i> tag following the last <a> tag
i_tag = a_elements[-1].find_next('i')
# Check if "feb24" is contained in the <i> tag and "feb24review" is not
if i_tag and 'feb24' in i_tag.get_text().lower() and 'feb24review' not in i_tag.get_text().lower():
unique_pagename_list.append(pagename)
# Print the list of PAGENAMEs per user
#print(f"User: {username}, PAGENAMEs: {unique_pagename_list}")
# Count the number of unique PAGENAMEs
n1_value = len(set(unique_pagename_list))
return n1_value, unique_pagename_list
# If the request was not successful or no second <ul> was found, return None
return None, None
def parse_table(text):
# Create a session for making requests
session = requests.Session()
# Keep track of processed usernames to avoid duplicate processing
processed_usernames = set()
# Find the starting index of the table
start_index = text.find('{| class="wikitable sortable"')
# Check if the table exists in the text
if start_index != -1:
# Find the ending index of the table
end_index = text.find('|}', start_index)
# Extract the table content
table_content = text[start_index:end_index]
# Split the table content into lines
lines = table_content.split('\n')
# Iterate through lines
for i, line in enumerate(lines):
if '![[User:' in line:
# Extract username from the line
username = re.search(r'\[\[User:([^]]+)', line).group(1)
# Check if the username has already been processed
if username not in processed_usernames:
# Get N1 value using the external function
n1_value, _ = get_n1_value(username, session)
if n1_value is not None:
# Two lines after the username
n1_line_index = i + 2
# One more line after N1
#n2_line_index = i + 3
if n1_line_index < len(lines):
# Update the N1 value in the line
lines[n1_line_index] = f"| {n1_value}"
# Debug prints
#print(f"User: {username}")
#print(f"New N1 line: {lines[n1_line_index]}")
#print(f"New N2 line: {lines[n2_line_index]}")
# Mark the username as processed
processed_usernames.add(username)
# Introduce a delay between calls (adjust as needed)
#time.sleep(0.1) # 0.1-second delay
# Join the lines back together
modified_table = '\n'.join(lines)
# Replace the original table with the modified one
text = text[:start_index] + modified_table + text[end_index:]
return (text)
# Extracts page content
content = page.text
newText = parse_table(content)
# Replaces old table with new one
page.text = page.text.replace(content, newText)
# Saves the page
page.save('pwb leaderboard update test run')