You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

65 lines
1.9 KiB
Python

#!/bin/python3
# A way to make HTTP requests
import requests
# For nth function
from itertools import islice
from sys import argv
# HTML parser
try:
from BeautifulSoup import BeautifulSoup
except ImportError:
from bs4 import BeautifulSoup
# Get the nth element from an iterable.
def nth(iterable, n, default=None):
"Returns the nth item or a default value"
return next(islice(iterable, n, None), default)
# Check arguments
if 2 > len(argv):
# No arguments were passed
print('You need to pass a file path to write the emojis list')
print('Example: ' + argv[0] + ' emojis.txt')
exit(1)
path = argv[1]
# Get the HTML
request = requests.get("http://www.unicode.org/emoji/charts/emoji-list.html")
# Parse it
parsed_html = BeautifulSoup(request.text, features="lxml")
# Find all rows
rows = parsed_html.body.find_all('tr')
# Let's remove rows with headers from the list
for row in rows:
# Get the first child - either everything is a <th> or a <td> anyways
child = next(row.children)
if child.name == "th":
# Remove the tag
row.decompose()
# The output string, which then will be written into a file.
output = ""
# The output format: emoji, emoji_main_name (more_emoji_names)
output_format = "{} {} ({})\n"
# Format for the names of the emojis.
name_format = ":{}:"
# Parse the data
for row in rows:
# Handle decomposed rows
if row.decomposed:
continue
# Remove "⊛ " from the emoji name that is added to newly added emoji names
name = name_format.format(nth(row.children, 6).text.replace('', '').replace(' ', '_').replace(':', ''))
alt_names = ', '.join(list(
map(
lambda n: ':' + n.replace(' ', '_').replace(':', '') + ':',
nth(row.children, 8).text.split(' | ')
)
))
output += output_format.format(row.img.attrs.get('alt'), name, alt_names)
out_file = open(path, 'w')
out_file.write(output)
out_file.close()