User:Zache/esimerkki

Pywikibot-kirjautuminen
 * /koko scripti (hakee yhden kuvan sekä commonsista, että finnasta ja vertailee niiden Phash-summaa)
 * /Esimerkki 2 (Hakee Petscan:lla luettelon FinnaUploadBot:lla tallennetuista kuvista, lukee tiedoston nimestä Finna_id:n, hakee Finnasta ja Commonsista kuvatiedostot ja vertaa niiden Phash-tarkistussummaa ja jos summat vastaavat, niin tallentaa Commonsin tietoihin ja  tiedot.

Valmistelu

 * 1) mkdir projekti
 * 2) cd projekti


 * 1) python3 -m venv venv
 * 2) source venv/bin/activate
 * 3) echo "usernames['commons']['commons'] = 'FinnaUploadBot'" > user-config.py

pip install pywikibot wikitextparser

Login with Pywikibot
test.py import pywikibot
 * 1) !/usr/bin/python

site = pywikibot.Site("commons", "commons") site.login repo = site.data_repository

user = site.user if user: pywikibot.output('Logged in on {0} as {1}.'.format(site, user)) else: pywikibot.output('Not logged in on {0}.'.format(site)) exit(1)

Running test.py
 * 1) python test.py

First edit
newPage=Page(site,pageTitle) newPage.text="Commons:Sandbox" newPage.save("test edit")

Read SDC properties
def getCurrentMediaInfo(site, mediaid): request = site._simple_request(action='wbgetentities',ids=mediaid) data = request.submit if data.get(u'entities').get(mediaid).get(u'pageid'): return data.get(u'entities').get(mediaid) return {}
 * 1) Read SDC mediainfo

pageTitle="File:Tallinnan_asemakaava,_kilpailuehdotus,_nimimerkki_Bebaungsplan_fur_Reval,_Viro,_ajoittamaton_(mfa.153f3f6e-dcd2-4a0f-be1f-bddc580368be).tif" page=Page(site,pageTitle) item_id='M' + str(page.pageid)
 * 1) static pagename

item = getCurrentMediaInfo(site, item_id)

finna_id='' if item.get('statements') and item.get('statements').get('P9478'): p9478_finna_id_statemens=item.get('statements').get('P9478') for s in p9478_finna_id_statemens: finna_id=s.get('mainsnak').get('datavalue').get('value')

phash='' if item.get('statements') and item.get('statements').get('P9310'): p9310_phash_statemens=item.get('statements').get('P9310') for s in p9310_phash_statemens: phash=s.get('mainsnak').get('datavalue').get('value')

print("finna_id: ", finna_id) print("phash: ", phash)

Read Finna info and download image
import urllib import json import requests
 * 1) Get image info from Finna

def downloadFile(url): local_filename = "tmp/file_to_commons.tif" r = requests.get(url) f = open(local_filename, 'wb') for chunk in r.iter_content(chunk_size=512 * 1024): if chunk: # filter out keep-alive new chunks f.write(chunk) f.close return local_filename

url="https://api.finna.fi/v1/record?lng=fi&prettyPrint=1&id=" + str(finna_id)

with urllib.request.urlopen(url) as file: data = json.loads(file.read.decode)

if ("status" in data and data.get("status")=="OK"): records=data.get("records") for record in records: images=record.get("images") for image in images: image_url="https://www.finna.fi" + image; print("Downloading file " + image_url +"\n") local_file= downloadFile(image_url)

Calculate phash
import imagehash from PIL import Image
 * 1) Phash

def read_phash_and_imageinfo(file_name): ret={} im = Image.open(local_file)

#calculate phash ret["phash"] = str(imagehash.phash(im))

ret["image_width"], ret["image_height"] = im.size ret["imagehash_version"]= "Imagehash " + str(imagehash.__version__) return ret

phash=read_phash_and_imageinfo(local_file) print(phash)
 * 1) read_phash_and_imageinfo

Get image info from Commons + calculate phash

 * 1) Get image info from Commons

def get_commons_image_info(filename): url="https://commons.wikimedia.org/w/api.php?action=query&format=json&prop=imageinfo&iiprop=url&titles=" + urllib.parse.quote_plus(filename) url+="&iiurlwidth=1024"

with urllib.request.urlopen(url) as file: data = json.loads(file.read.decode) return data

t=get_commons_image_info(pageTitle) commons_image_url=t.get("query").get("pages").get(str(page.pageid)).get("imageinfo")[0].get("url")

local_file= downloadFile(commons_image_url) commons_phash=read_phash_and_imageinfo(local_file)

print("* Finna phash: " + str(finna_phash["phash"])); print("* Commons phash" + str(commons_phash["phash"])); print("* Difference: ", str(finna_phash["phash"] - commons_phash["phash"]))
 * 1) Print commons & finna phash