Toolserver:Query service/1

From mediawiki.org

This page was moved from the Toolserver wiki.
Toolserver has been replaced by Toolforge. As such, the instructions here may no longer work, but may still be of historical interest.
Please help by updating examples, links, template links, etc. If a page is still relevant, move it to a normal title and leave a redirect.

Begriffsklärungsseiten[edit]

Description Top linked disambiguation pages on the and the .
Assignee Bryan
JIRA bug DBQ-1
Source dewiki_dab.sh / nlwiki_dab.sh
Results /
Approximate run time 40 minutes
Interval weekly
Category:Query service


Code[edit]

/usr/bin/mysql -hsql-s2 --skip-column-names u_bryan >/home/bryan/public_html/stats/dbquery/dewiki_dab.txt <<EOF
CREATE TABLE IF NOT EXISTS dewiki_disambiguations (page_title VARBINARY(255), page_id INT, page_latest INT, rd_title VARBINARY(255), date DATE, PRIMARY KEY(page_title), INDEX(rd_title), INDEX (date));
CREATE TABLE IF NOT EXISTS dewiki_dablinks (page_title VARBINARY(255), linkcount INT, date DATE, PRIMARY KEY(page_title), INDEX(date));

DELETE FROM dewiki_disambiguations WHERE date = CURDATE();

-- All pages in the main namespace that are in the category "Begriffsklärung"
INSERT INTO dewiki_disambiguations SELECT page_title, page_id, page_latest, NULL as rd_title, CURDATE() AS date FROM dewiki_p.page, dewiki_p.categorylinks WHERE page_namespace = 0 AND page_id = cl_from AND cl_to = "Begriffsklärung";

-- All pages in the main namespace that redirect to a disambiguation page
REPLACE INTO dewiki_disambiguations SELECT p.page_title, p.page_id, p.page_latest, r.rd_title, CURDATE() AS date FROM dewiki_p.page AS p, dewiki_p.redirect AS r, dewiki_disambiguations AS d WHERE p.page_namespace = 0 AND p.page_id = r.rd_from AND r.rd_namespace = 0 AND r.rd_title = d.page_title AND d.date = CURDATE() AND d.rd_title IS NULL;

-- Links to dismabiguation pages
DELETE FROM dewiki_dablinks WHERE date = CURDATE();

-- Links to disambiguations where the link source is in the main namespace
INSERT INTO dewiki_dablinks SELECT d.page_title, COUNT(p.page_id) AS linkcount, CURDATE() as date FROM dewiki_p.page AS p, dewiki_p.pagelinks, dewiki_disambiguations AS d WHERE p.page_namespace = 0 AND p.page_id = pl_from AND (pl_namespace, pl_title) = (0, d.page_title) AND d.date = CURDATE() GROUP BY d.page_title;

-- Get the diffs in linkcount
CREATE TEMPORARY TABLE dewiki_dablinks_diff (page_title VARBINARY(255), cur_linkcount INT, prev_linkcount INT, PRIMARY KEY(page_title));
INSERT INTO dewiki_dablinks_diff (page_title, cur_linkcount, prev_linkcount) SELECT page_title, linkcount, linkcount FROM dewiki_dablinks WHERE date = CURDATE() AND linkcount > 100 ORDER BY linkcount DESC;
SELECT DISTINCT @prev_date := date FROM dewiki_dablinks WHERE date < CURDATE() ORDER BY date DESC LIMIT 1;
UPDATE dewiki_dablinks_diff, dewiki_dablinks SET prev_linkcount = linkcount WHERE dewiki_dablinks_diff.page_title = dewiki_dablinks.page_title AND date = @prev_date;

-- Output in wiki format
SELECT CONCAT('# [[', page_title, ']]: ', cur_linkcount, ' (', (cur_linkcount - prev_linkcount), ') [[Special:Whatlinkshere/', page_title, '|Links]]') FROM dewiki_dablinks_diff ORDER BY cur_linkcount DESC;
FROM dewiki_dablinks_diff ORDER BY cur_linkcount DESC;
EOF

/home/bryan/local/bin/python update.py -w de:wikipedia -p Wikipedia:WikiProjekt_Begriffsklärungsseiten/Top-BKS -t list -s "Updating disambiguations" `mysql -hsql-s2 --skip-column-names -e "SELECT CONCAT('count:', COUNT(*)) FROM dewiki_disambiguations WHERE date = CURDATE(); SELECT CONCAT('linkcount:', SUM(linkcount)) FROM dewiki_dablinks WHERE date = CURDATE(); SELECT CONCAT('today:', CURDATE()); SELECT CONCAT('last:', date) FROM dewiki_dablinks WHERE date < CURDATE() ORDER BY date DESC LIMIT 1;" u_bryan` </home/bryan/public_html/stats/dbquery/dewiki_dab.txt