You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
65 lines
1.9 KiB
Python
65 lines
1.9 KiB
Python
#!/bin/python3
|
|
# A way to make HTTP requests
|
|
import requests
|
|
# For nth function
|
|
from itertools import islice
|
|
from sys import argv
|
|
# HTML parser
|
|
try:
|
|
from BeautifulSoup import BeautifulSoup
|
|
except ImportError:
|
|
from bs4 import BeautifulSoup
|
|
|
|
# Get the nth element from an iterable.
|
|
def nth(iterable, n, default=None):
|
|
"Returns the nth item or a default value"
|
|
return next(islice(iterable, n, None), default)
|
|
|
|
# Check arguments
|
|
if 2 > len(argv):
|
|
# No arguments were passed
|
|
print('You need to pass a file path to write the emojis list')
|
|
print('Example: ' + argv[0] + ' emojis.txt')
|
|
exit(1)
|
|
|
|
path = argv[1]
|
|
|
|
# Get the HTML
|
|
request = requests.get("http://www.unicode.org/emoji/charts/emoji-list.html")
|
|
# Parse it
|
|
parsed_html = BeautifulSoup(request.text, features="lxml")
|
|
# Find all rows
|
|
rows = parsed_html.body.find_all('tr')
|
|
# Let's remove rows with headers from the list
|
|
for row in rows:
|
|
# Get the first child - either everything is a <th> or a <td> anyways
|
|
child = next(row.children)
|
|
if child.name == "th":
|
|
# Remove the tag
|
|
row.decompose()
|
|
|
|
# The output string, which then will be written into a file.
|
|
output = ""
|
|
# The output format: emoji, emoji_main_name (more_emoji_names)
|
|
output_format = "{} {} ({})\n"
|
|
# Format for the names of the emojis.
|
|
name_format = ":{}:"
|
|
# Parse the data
|
|
for row in rows:
|
|
# Handle decomposed rows
|
|
if row.decomposed:
|
|
continue
|
|
# Remove "⊛ " from the emoji name that is added to newly added emoji names
|
|
name = name_format.format(nth(row.children, 6).text.replace('⊛ ', '').replace(' ', '_').replace(':', ''))
|
|
alt_names = ', '.join(list(
|
|
map(
|
|
lambda n: ':' + n.replace(' ', '_').replace(':', '') + ':',
|
|
nth(row.children, 8).text.split(' | ')
|
|
)
|
|
))
|
|
output += output_format.format(row.img.attrs.get('alt'), name, alt_names)
|
|
|
|
out_file = open(path, 'w')
|
|
out_file.write(output)
|
|
out_file.close()
|