User:Zache/esimerkki
Pywikibot-kirjautuminen
- /koko scripti (hakee yhden kuvan sekä commonsista, että finnasta ja vertailee niiden Phash-summaa)
- /Esimerkki 2 (Hakee Petscan:lla luettelon FinnaUploadBot:lla tallennetuista kuvista, lukee tiedoston nimestä Finna_id:n, hakee Finnasta ja Commonsista kuvatiedostot ja vertaa niiden Phash-tarkistussummaa ja jos summat vastaavat, niin tallentaa Commonsin tietoihin Finna ID (P9478) ja pHash checksum (P9310) tiedot.
Valmistelu[edit]
# mkdir projekti # cd projekti # python3 -m venv venv # source venv/bin/activate # echo "usernames['commons']['commons'] = 'FinnaUploadBot'" > user-config.py pip install pywikibot wikitextparser
Login with Pywikibot[edit]
test.py
#!/usr/bin/python import pywikibot site = pywikibot.Site("commons", "commons") site.login() repo = site.data_repository() user = site.user() if user: pywikibot.output('Logged in on {0} as {1}.'.format(site, user)) else: pywikibot.output('Not logged in on {0}.'.format(site)) exit(1)
Running test.py
# python test.py
First edit[edit]
newPage=Page(site,pageTitle) newPage.text="Commons:Sandbox" newPage.save("test edit")
Read SDC properties[edit]
# Read SDC mediainfo def getCurrentMediaInfo(site, mediaid): request = site._simple_request(action='wbgetentities',ids=mediaid) data = request.submit() if data.get(u'entities').get(mediaid).get(u'pageid'): return data.get(u'entities').get(mediaid) return {} # static pagename pageTitle="File:Tallinnan_asemakaava,_kilpailuehdotus,_nimimerkki_Bebaungsplan_fur_Reval,_Viro,_ajoittamaton_(mfa.153f3f6e-dcd2-4a0f-be1f-bddc580368be).tif" page=Page(site,pageTitle) item_id='M' + str(page.pageid) item = getCurrentMediaInfo(site, item_id) finna_id='' if item.get('statements') and item.get('statements').get('P9478'): p9478_finna_id_statemens=item.get('statements').get('P9478') for s in p9478_finna_id_statemens: finna_id=s.get('mainsnak').get('datavalue').get('value') phash='' if item.get('statements') and item.get('statements').get('P9310'): p9310_phash_statemens=item.get('statements').get('P9310') for s in p9310_phash_statemens: phash=s.get('mainsnak').get('datavalue').get('value') print("finna_id: ", finna_id) print("phash: ", phash)
Read Finna info and download image[edit]
# Get image info from Finna import urllib import json import requests def downloadFile(url): local_filename = "tmp/file_to_commons.tif" r = requests.get(url) f = open(local_filename, 'wb') for chunk in r.iter_content(chunk_size=512 * 1024): if chunk: # filter out keep-alive new chunks f.write(chunk) f.close() return local_filename url="https://api.finna.fi/v1/record?lng=fi&prettyPrint=1&id=" + str(finna_id) with urllib.request.urlopen(url) as file: data = json.loads(file.read().decode()) if ("status" in data and data.get("status")=="OK"): records=data.get("records") for record in records: images=record.get("images") for image in images: image_url="https://www.finna.fi" + image; print("Downloading file " + image_url +"\n") local_file= downloadFile(image_url)
Calculate phash[edit]
# Phash import imagehash from PIL import Image def read_phash_and_imageinfo(file_name): ret={} im = Image.open(local_file) #calculate phash ret["phash"] = str(imagehash.phash(im)) ret["image_width"], ret["image_height"] = im.size ret["imagehash_version"]= "Imagehash " + str(imagehash.__version__) return ret # read_phash_and_imageinfo phash=read_phash_and_imageinfo(local_file) print(phash)
Get image info from Commons + calculate phash[edit]
# Get image info from Commons def get_commons_image_info(filename): url="https://commons.wikimedia.org/w/api.php?action=query&format=json&prop=imageinfo&iiprop=url&titles=" + urllib.parse.quote_plus(filename) url+="&iiurlwidth=1024" with urllib.request.urlopen(url) as file: data = json.loads(file.read().decode()) return data t=get_commons_image_info(pageTitle) commons_image_url=t.get("query").get("pages").get(str(page.pageid)).get("imageinfo")[0].get("url") local_file= downloadFile(commons_image_url) commons_phash=read_phash_and_imageinfo(local_file) # Print commons & finna phash print("* Finna phash: " + str(finna_phash["phash"])); print("* Commons phash" + str(commons_phash["phash"])); print("* Difference: ", str(finna_phash["phash"] - commons_phash["phash"]))