User:Zache/esimerkki

From mediawiki.org

Pywikibot-kirjautuminen

  • /koko scripti (hakee yhden kuvan sekä commonsista, että finnasta ja vertailee niiden Phash-summaa)
  • /Esimerkki 2 (Hakee Petscan:lla luettelon FinnaUploadBot:lla tallennetuista kuvista, lukee tiedoston nimestä Finna_id:n, hakee Finnasta ja Commonsista kuvatiedostot ja vertaa niiden Phash-tarkistussummaa ja jos summat vastaavat, niin tallentaa Commonsin tietoihin Finna ID (P9478) ja pHash checksum (P9310) tiedot.

Valmistelu[edit]

# mkdir projekti
# cd projekti

# python3 -m venv venv
# source venv/bin/activate
# echo "usernames['commons']['commons'] = 'FinnaUploadBot'" > user-config.py

pip install pywikibot wikitextparser

Login with Pywikibot[edit]

test.py

#!/usr/bin/python
import pywikibot

site = pywikibot.Site("commons", "commons")
site.login()
repo = site.data_repository()

user = site.user()
if user:
   pywikibot.output('Logged in on {0} as {1}.'.format(site, user))
else:
   pywikibot.output('Not logged in on {0}.'.format(site))
   exit(1)

Running test.py

# python test.py

First edit[edit]

newPage=Page(site,pageTitle)
newPage.text="Commons:Sandbox"
newPage.save("test edit")

Read SDC properties[edit]

# Read SDC mediainfo
def getCurrentMediaInfo(site, mediaid):
   request = site._simple_request(action='wbgetentities',ids=mediaid)
   data = request.submit()
   if data.get(u'entities').get(mediaid).get(u'pageid'):
      return data.get(u'entities').get(mediaid)
   return {}

# static pagename
pageTitle="File:Tallinnan_asemakaava,_kilpailuehdotus,_nimimerkki_Bebaungsplan_fur_Reval,_Viro,_ajoittamaton_(mfa.153f3f6e-dcd2-4a0f-be1f-bddc580368be).tif"
page=Page(site,pageTitle)
item_id='M' + str(page.pageid)

item = getCurrentMediaInfo(site, item_id)

finna_id=''
if item.get('statements') and item.get('statements').get('P9478'):
   p9478_finna_id_statemens=item.get('statements').get('P9478')
   for s in p9478_finna_id_statemens:
      finna_id=s.get('mainsnak').get('datavalue').get('value')

phash=''
if item.get('statements') and item.get('statements').get('P9310'):
   p9310_phash_statemens=item.get('statements').get('P9310')
   for s in p9310_phash_statemens:
      phash=s.get('mainsnak').get('datavalue').get('value')

print("finna_id: ", finna_id)
print("phash: ",  phash)

Read Finna info and download image[edit]

# Get image info from Finna
import urllib
import json
import requests 

def downloadFile(url):
    local_filename = "tmp/file_to_commons.tif"
    r = requests.get(url)
    f = open(local_filename, 'wb')
    for chunk in r.iter_content(chunk_size=512 * 1024): 
        if chunk: # filter out keep-alive new chunks
            f.write(chunk)
    f.close()
    return local_filename

url="https://api.finna.fi/v1/record?lng=fi&prettyPrint=1&id=" + str(finna_id)

with urllib.request.urlopen(url) as file:
    data = json.loads(file.read().decode())

    if ("status" in data and data.get("status")=="OK"):
       records=data.get("records")
       for record in records:
          images=record.get("images")
          for image in images:
             image_url="https://www.finna.fi" + image;
             print("Downloading file " + image_url +"\n")
             local_file= downloadFile(image_url)

Calculate phash[edit]

# Phash 
import imagehash
from PIL import Image

def read_phash_and_imageinfo(file_name):
   ret={}
   im = Image.open(local_file)

   #calculate phash
   ret["phash"] = str(imagehash.phash(im))

   ret["image_width"], ret["image_height"] = im.size
   ret["imagehash_version"]= "Imagehash " + str(imagehash.__version__)
   return ret

# read_phash_and_imageinfo
phash=read_phash_and_imageinfo(local_file)
print(phash)

Get image info from Commons + calculate phash[edit]

# Get image info from Commons

def get_commons_image_info(filename):
   url="https://commons.wikimedia.org/w/api.php?action=query&format=json&prop=imageinfo&iiprop=url&titles=" + urllib.parse.quote_plus(filename)
   url+="&iiurlwidth=1024"

   with urllib.request.urlopen(url) as file:
      data = json.loads(file.read().decode())
      return data

t=get_commons_image_info(pageTitle)
commons_image_url=t.get("query").get("pages").get(str(page.pageid)).get("imageinfo")[0].get("url")

local_file= downloadFile(commons_image_url)
commons_phash=read_phash_and_imageinfo(local_file)

# Print commons & finna phash
print("* Finna phash: " + str(finna_phash["phash"]));
print("* Commons phash" + str(commons_phash["phash"]));
print("* Difference: ", str(finna_phash["phash"] - commons_phash["phash"]))