Toolserver:~gregbard/philosobot/phillists/redlinks/most linked not rated.pl

use strict;	     # 'strict' insists that all variables be  declared use diagnostics;	     # 'diagnostics' expands the cryptic warnings undef $/; # undefines the separator. Can read one whole file in one scalar.
 * 1) !/usr/bin/perl

use lib $ENV{HOME}. '../modules'; # path to perl modules require 'bin/perlwikipedia_utils.pl';

use open 'utf8';

MAIN: {
 * 1) create a list of most linked to philosophy articles from other philosophy articles
 * 2) mark which of those articles are rated by quality by the philosophy wikiproject

my (@articles, %linked_freq, $line, $article, $freq, %phil_hash, $text, $rated_file, $rated_file_bz2); my (%rated_hash, $sep, $outfile, $attempts, $sleep); chdir $ENV{'HOME'}. '../phillists/redlinks'; # get the list of philosophy articles. Put the entries in the list in a hash open (FILE, "<", "../All_philosophy.txt");   @articles=split ("\n", );  close(FILE); open (FILE, "<", "../All_philosophers.txt"); @articles=(@articles, split ("\n", )); close(FILE); foreach $article (@articles){ $phil_hash{$article} = 1; } # get a list of all articles linked from philosophy articles, with how many philosophy articles link to each article in the list # of all those articles, keep only the philosophy ones (e.g., English language is linked a lot from philosophy articles # but we will ignore it for our purposes.  open(FILE, "; close(FILE);  foreach $line (split ("\n", $text)){

next unless ($line =~ /^.*?\[\[(.*?)\]\].*?(\d+)\s*$/); $article = $1; $freq = $2; # an article, and how many times it is linked to   next unless (exists $phil_hash{$article}); $linked_freq{$article} = $freq; }

# now read the articles which were rated by quality as part of the philosophy wikiproject # if the file containing the ratings is compressed, uncompress it $rated_file = '/tmp/wp10/Philosophy_articles_by_quality_old_ids'; $rated_file_bz2 = $rated_file. ".bz2"; if (-e $rated_file_bz2){ print `bunzip2 -fv $rated_file_bz2`. "\n"; } if (! -e $rated_file){ print "$rated_file does not exist. Bailing out.\n"; exit(0); }

$sep = ' ;; '; open(FILE, "<$rated_file"); $text = ; close(FILE); foreach $line (split ("\n", $text)){

next if ($line =~ /^\#/); # a line starting with # is a comment to be ignored next unless ($line =~ /^(.*?)$sep/); # there is a separator $article = $1; $rated_hash{$article} = 1; }

# create a list of all philosophy articles linked from other philosophy articles in the decreasing # frequency of being linked to. Mark articles which are not assessed yet $text = ""; foreach $article (sort { $linked_freq{$b} <=> $linked_freq{$a} } keys %linked_freq){

$text .= '# ' . $article . ' (talk) '. $linked_freq{$article}. ' ';   $text .=  ' not rated! ' if (! exists $rated_hash {$article} ); $text .= "\n";

# cut the list when we arrive at articles which are linked from a number of articles # which is less than a current threshhold. last if ($linked_freq{$article} <= 10); } $text = &print_header. $text; # add a note on top

# Write to file. $outfile = 'User:Philosobot/Most_linked_phil_articles.wiki'; open(FILE, ">$outfile"); print FILE "$text\n";  close(FILE);

# Also submit to Wikipedia. my $Editor=wikipedia_login; $attempts = 10; $sleep = 5; wikipedia_submit($Editor, $outfile, "Update", $text, $attempts, $sleep); }

sub print_header {

return 'This is a list of philosophy articles which are most linked to from other philosophy articles (that\'s the number on the right).

The point of this list is that the more linked to an article is, the more important it probably is, and the more crucial is for it to be in good shape.

Also, articles which are not yet in Version 1.0 Editorial Team/Philosophy articles by quality are pointed out.

'; }