getusers.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. #!/usr/bin/env python3
  2. """
  3. Script to get data from motomirko and geocode it.
  4. """
  5. import urllib.request
  6. import urllib.parse
  7. import json
  8. import time
  9. DEBUG = True
  10. API_URL = "https://motomirko.pl/"
  11. API_KEY = "ciach"
  12. API_USER = "mirkomapa"
  13. API_SLEEP = 1
  14. def debug(text):
  15. """Debug messages"""
  16. if DEBUG:
  17. print(text)
  18. def discourse_api_url(req):
  19. """Prepare discourse api call url"""
  20. return "{0}{1}&api_key={2}&api_username={3}".format(API_URL, req, API_KEY, API_USER)
  21. def discourse_api(url):
  22. """Make API call to discourse"""
  23. try:
  24. debug("In dicourse_api: {0}".format(url))
  25. response = urllib.request.urlopen(url)
  26. except urllib.error.HTTPError as e:
  27. print("Oppss, HTTP returned: {0} with: {1}".format(e.code, e.reason))
  28. print("Check API settings")
  29. exit(1)
  30. except urllib.error.URLError as e:
  31. print("Oppss, URL error: {0}".format(e.reason))
  32. exit(2)
  33. return response
  34. def get_users():
  35. """Get list of all users"""
  36. next_page = "directory_items.json?period=all&order=post_count"
  37. total_users = 1
  38. non_active = 0
  39. users = list()
  40. while len(users) < total_users - non_active:
  41. url = discourse_api_url(next_page.replace("directory_items?", "directory_items.json?"))
  42. debug("In get_users: {0}".format(url))
  43. response = discourse_api(url).read().decode('utf-8')
  44. response = json.loads(response)
  45. total_users = response["total_rows_directory_items"]
  46. next_page = response["load_more_directory_items"]
  47. debug("In get_users, next page: {0}".format(next_page))
  48. for user in response["directory_items"]:
  49. count = user["likes_received"] + user["likes_given"] + user["topics_entered"] +\
  50. user["topic_count"] + user["post_count"] + user["posts_read"] +\
  51. user["days_visited"]
  52. if count > 0:
  53. users.append(user["user"]["username"])
  54. else:
  55. non_active = non_active + 1
  56. debug(json.dumps(user, indent=2))
  57. return users
  58. def get_latlon(location):
  59. """Geocode location"""
  60. url = "https://nominatim.openstreetmap.org/search/?q={0}&format=json&limit=1".format(
  61. urllib.parse.quote(location))
  62. debug("In get_latlon: {0}".format(url))
  63. try:
  64. response = urllib.request.urlopen(url)
  65. except urllib.error.HTTPError as e:
  66. print("Geocoding HTTP error {0} with: {1}".format(e.code, e.reason))
  67. exit(3)
  68. except urllib.error.URLError as e:
  69. print("Geocoding URL error: {0}".format(e.reason))
  70. exit(4)
  71. response = json.loads(response.read().decode('utf-8'))
  72. return response
  73. def get_data(users):
  74. """Get data about users, omit users without location"""
  75. users_data = list()
  76. for user in users:
  77. debug("Processing in get_data: {0}".format(user))
  78. data = dict()
  79. url = discourse_api_url("users/{0}.json?".format(user))
  80. user_data = discourse_api(url).read().decode('utf-8')
  81. user_data = json.loads(user_data)
  82. if "location" in user_data["user"]:
  83. data["username"] = user
  84. data["avatar"] = user_data["user"]["avatar_template"].replace("{size}", "32")
  85. data["location"] = dict()
  86. location = get_latlon(user_data["user"]["location"])
  87. if location:
  88. data["location"]["name"] = user_data["user"]["location"]
  89. data["location"]["lat"] = location[0]["lat"]
  90. data["location"]["lon"] = location[0]["lon"]
  91. debug("In get_data, location: {0}".format(location))
  92. users_data.append(data)
  93. else:
  94. debug("In get_data, no location for {0}".format(user))
  95. time.sleep(API_SLEEP)
  96. return users_data
  97. if __name__ == "__main__":
  98. motomirko_users = get_users()
  99. usersdata = get_data(motomirko_users)
  100. print(json.dumps(usersdata, indent=2))