User:Zache/esimerkki/koko scripti

From mediawiki.org
#!/usr/bin/python
import pywikibot

# MAIN()
site = pywikibot.Site("commons", "commons")
site.login()
repo = site.data_repository()

user = site.user()
if user:
   pywikibot.output('Logged in on {0} as {1}.'.format(site, user))
else:
   pywikibot.output('Not logged in on {0}.'.format(site))
   exit(1)

pageTitle="File:Tallinnan asemakaava, kilpailuehdotus, nimimerkki Bebaungsplan fur Reval, Viro, ajoittamaton (mfa.153f3f6e-dcd2-4a0f-be1f-bddc580368be).tif"
page=pywikibot.Page(site,pageTitle)
#newPage.text="Commons:Sandbox"
#newPage.save("test edit")

item_id='M' + str(page.pageid)

# Read SDC mediainfo
def getCurrentMediaInfo(site, mediaid):
   request = site._simple_request(action='wbgetentities',ids=mediaid)
   data = request.submit()
   if data.get(u'entities').get(mediaid).get(u'pageid'):
      return data.get(u'entities').get(mediaid)
   return {}

item = getCurrentMediaInfo(site, item_id)

finna_id=''
if item.get('statements') and item.get('statements').get('P9478'):
   p9478_finna_id_statemens=item.get('statements').get('P9478')
   for s in p9478_finna_id_statemens:
      finna_id=s.get('mainsnak').get('datavalue').get('value')

phash=''
if item.get('statements') and item.get('statements').get('P9310'):
   p9310_phash_statemens=item.get('statements').get('P9310')
   for s in p9310_phash_statemens:
      phash=s.get('mainsnak').get('datavalue').get('value')

print("finna_id: ", finna_id)
print("phash: ",  phash)

# Phash 
import imagehash
from PIL import Image

def downloadFile(url):
    local_filename = "tmp/file_to_commons.tif"
    r = requests.get(url)
    f = open(local_filename, 'wb')
    for chunk in r.iter_content(chunk_size=512 * 1024): 
        if chunk: # filter out keep-alive new chunks
            f.write(chunk)
    f.close()
    return local_filename

def read_phash_and_imageinfo(file_name):
   ret={}
   im = Image.open(local_file)

   #calculate phash
   ret["phash"] = str(imagehash.phash(im))

   ret["image_width"], ret["image_height"] = im.size
   ret["imagehash_version"]= "Imagehash " + str(imagehash.__version__)
   return ret


# Get image info from Finna
import urllib
import json
import requests 

url="https://api.finna.fi/v1/record?lng=fi&prettyPrint=1&id=" + str(finna_id)

with urllib.request.urlopen(url) as file:
    data = json.loads(file.read().decode())

    if ("status" in data and data.get("status")=="OK"):
       records=data.get("records")
       for record in records:
          images=record.get("images")
          for image in images:
             image_url="https://www.finna.fi" + image;
             print("Downloading file " + image_url +"\n")
             local_file= downloadFile(image_url)

             # read_phash_and_imageinfo
             finna_phash=read_phash_and_imageinfo(local_file)



# Get image info from Commons

def get_commons_image_info(filename):
   url="https://commons.wikimedia.org/w/api.php?action=query&format=json&prop=imageinfo&iiprop=url&titles=" + urllib.parse.quote_plus(filename)
   url+="&iiurlwidth=1024"

   with urllib.request.urlopen(url) as file:
      data = json.loads(file.read().decode())
      return data

t=get_commons_image_info(pageTitle)
commons_image_url=t.get("query").get("pages").get(str(page.pageid)).get("imageinfo")[0].get("url")

local_file= downloadFile(commons_image_url)
commons_phash=read_phash_and_imageinfo(local_file)

print(finna_phash)
print(commons_phash)