commit 20606f2af22d4a7e588cb1993c638acd19523f2c Author: Quantum Date: Mon Dec 3 18:42:49 2018 -0500 Initial commit: release 0.1 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..dd59b3c --- /dev/null +++ b/.gitignore @@ -0,0 +1,113 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# PyCharm +/.idea + +# macOS insanity +._* + +# purge-static +.purge-static diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..bf33798 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License + +Copyright (c) 2018 Guanzhong Chen. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..b410f47 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include MANIFEST.in +include LICENSE +include README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..f3e38de --- /dev/null +++ b/README.md @@ -0,0 +1,56 @@ +# `purge-static` [![PyPI](https://img.shields.io/pypi/v/purge-static.svg)](https://pypi.org/project/purge-static/) [![PyPI - Format](https://img.shields.io/pypi/format/purge-static.svg)](https://pypi.org/project/purge-static/) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/purge-static.svg)](https://pypi.org/project/purge-static/) + +`purge-static` is a tool to find changed static files, show their URLs, +and optionally purge them for you on your CDN, such as CloudFlare. + +With `purge-static`, you can enable aggressive caching for your static +site on your CDN, caching the entire site on the CDN edge. When you update +your site, you simply need to use `purge-static` to purge only the changed +files. + +`purge-static` uses the SHA256 hash of files to determine if they changed. + +## Installation + +``` +pip install purge-static +``` + +## Example Invocation + +```sh +purge-static -d /path/to/my/webroot -u https://example.com +``` + +If your webroot is not writable, you can select a different path to write +the hash store with `--store /path/to/a/file/to/store/hashes`. + +This example ignores all `.gz` files, since they are only used for `nginx`'s +`gzip_static` module, as well as all files with hash already in the name: + +```sh +purge-static -d /path/to/my/webroot -u https://example.com \ + -i '.*\.gz$|.*-[0-9a-f]{64}\.' +``` + +For more detailed description of the arguments, run `purge-static --help`. + +## CloudFlare + +To use CloudFlare, you will need to create a credentials file: + +```json +{ + "email": "you@example.com", + "api_key": "myverysecretapikey" +} +``` + +Then, you can invoke `purge-static`: + +```sh +purge-static -d /path/to/my/webroot -u https://example.com \ + --cloudflare -c /path/to/my/credentiails -z mycloudflarezoneid +``` + +Note that `-z` takes the CloudFlare zone ID as 32 hex digits. diff --git a/purge_static/__init__.py b/purge_static/__init__.py new file mode 100644 index 0000000..b566830 --- /dev/null +++ b/purge_static/__init__.py @@ -0,0 +1,4 @@ +from purge_static.main import main + +if __name__ == '__main__': + main() diff --git a/purge_static/__main__.py b/purge_static/__main__.py new file mode 100644 index 0000000..b566830 --- /dev/null +++ b/purge_static/__main__.py @@ -0,0 +1,4 @@ +from purge_static.main import main + +if __name__ == '__main__': + main() diff --git a/purge_static/cdn/__init__.py b/purge_static/cdn/__init__.py new file mode 100644 index 0000000..20223ea --- /dev/null +++ b/purge_static/cdn/__init__.py @@ -0,0 +1 @@ +from purge_static.cdn.cloudflare import CloudFlareCDN diff --git a/purge_static/cdn/cloudflare.py b/purge_static/cdn/cloudflare.py new file mode 100644 index 0000000..214184b --- /dev/null +++ b/purge_static/cdn/cloudflare.py @@ -0,0 +1,45 @@ +import json +import sys + +import requests +import six + + +class CloudFlareCDN(object): + def __init__(self, args): + if not args.credentials: + sys.exit('No credentials for CloudFlare, use --credentials.') + + try: + with open(args.credentials) as f: + credentials = json.load(f) + except IOError: + sys.exit('Cannot read credentials file: %s' % (args.credentials,)) + except ValueError: + sys.exit('Credentials file not valid JSON: %s' % (args.credentials,)) + + self.email = credentials.get('email') + if not isinstance(self.email, six.string_types): + sys.exit('In credentials file: key "email" should map to a string') + + self.api_key = credentials.get('api_key') + if not isinstance(self.api_key, six.string_types): + sys.exit('In credentials file: key "api_key" should map to a string') + + self.zone = args.zone + if not self.zone: + sys.exit('No zone for CloudFlare, use --zone.') + + def purge(self, urls): + resp = requests.post( + 'https://api.cloudflare.com/client/v4/zones/%s/purge_cache' % (self.zone,), + json={'files': urls}, headers={ + 'X-Auth-Email': self.email, + 'X-Auth-Key': self.api_key, + } + ).json() + + if resp.get('success'): + return + + sys.exit(resp) diff --git a/purge_static/main.py b/purge_static/main.py new file mode 100644 index 0000000..c129993 --- /dev/null +++ b/purge_static/main.py @@ -0,0 +1,126 @@ +from __future__ import print_function + +import argparse +import os +import re +import shelve +import sys +from contextlib import closing +from hashlib import sha256 +from textwrap import dedent + +from purge_static.cdn import * + + +def main(): + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description=dedent('''\ + Find changed static files, show their URLs, and optionally purge them + for you on your CDN. + + This tool can be used to enable aggressive caching for your static site. + You can cache your entire static site on CDN edge nodes, and then use this + tool to purge all static files that changed on disk, by file hash (SHA256). + + Currently, only CloudFlare is supported. + ''')) + + parser.add_argument('url', help='URL prefix corresponding to --dir') + + parser.add_argument('-d', '--dir', default='.', + help='local filesystem path corresponding to --url') + parser.add_argument('-S', '--select', + help='regex to run on file names to select files to be purged, ' + 'matched from the start of the string') + parser.add_argument('-i', '--ignore', + help='regex to run on file names to ignore files, ' + 'matched from the start of the string') + parser.add_argument('-I', '--index', action='append', + help='file to consider as directory index (repeatable, default: index.html)') + parser.add_argument('-s', '--store', + help='file to store hashes in (default: $dir/.purge-static)') + parser.add_argument('-D', '--dry-run', action='store_true', help="dry run, don't update hashes") + parser.add_argument('-q', '--quiet', help='reduce output') + + cdn_group = parser.add_argument_group(title='CDN options') + cdn_group.add_argument('--cloudflare', dest='cdn', action='store_const', const=CloudFlareCDN, + help='purge files on CloudFlare CDN. need --credentials, which must be ' + 'a JSON files with two keys, email and api_key, containing your ' + 'CloudFlare account email and API key. need --zone, which must ' + 'be your CloudFlare zone ID (the hex code)') + cdn_group.add_argument('-c', '--credentials', help='credentials file path') + cdn_group.add_argument('-z', '--zone', help='zone ID (for CloudFlare)') + + args = parser.parse_args() + + if args.cdn: + cdn = args.cdn(args) + else: + cdn = None + + def select_regex(name): + regex = getattr(args, name) + if regex: + try: + return re.compile(regex) + except re.error: + sys.exit('Invalid regex for %s: %s' % (name, regex)) + + select = select_regex('select') + ignore = select_regex('ignore') + indexes = args.index or ['index.html'] + store = args.store or os.path.join(args.dir, '.purge-static') + + url_prefix = args.url + if not url_prefix.endswith('/'): + url_prefix += '/' + + urls = [] + with closing(shelve.open(store, protocol=2)) as store: + for dirpath, _, filenames in os.walk(args.dir): + relpath = os.path.relpath(dirpath, args.dir) + + urlpath = relpath.replace(os.sep, '/') + if os.altsep: + urlpath = urlpath.replace(os.altsep, '/') + if urlpath == os.curdir: + urlpath = '' + else: + urlpath += '/' + + changed = set() + for filename in filenames: + if select and not select.match(filename): + continue + if ignore and ignore.match(filename): + continue + + path = os.path.join(dirpath, filename) + hasher = sha256() + with open(os.path.join(args.dir, path), 'rb') as f: + for block in iter(lambda: f.read(65536), b''): + hasher.update(block) + + if hasher.digest() != store.get(path): + if not args.dry_run: + store[path] = hasher.digest() + urls.append(url_prefix + urlpath + filename) + changed.add(filename) + + for index in indexes: + if index in filenames: + if index in changed: + urls.append(url_prefix + urlpath) + break + + if cdn: + if urls: + cdn.purge(urls) + if not args.quiet: + print('Success: %d URLs purged' % len(urls)) + elif not args.quiet: + print('Nothing to change') + else: + for path in urls: + print(path) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..803d987 --- /dev/null +++ b/setup.py @@ -0,0 +1,49 @@ +import os + +from setuptools import setup, find_packages + +with open(os.path.join(os.path.dirname(__file__), 'README.md')) as f: + long_description = f.read() + +setup( + name='purge-static', + version='0.1', + packages=find_packages(), + + entry_points={ + 'console_scripts': [ + 'purge-static = purge_static:main', + ], + }, + + author='quantum', + author_email='quantum2048@gmail.com', + url='https://github.com/quantum5/purge-static', + description='Find changed static files, show their URLs, and optionally ' + 'purge them for you on your CDN.', + long_description=long_description, + long_description_content_type='text/markdown', + license='MIT', + keywords='cloudflare cdn static cache purge', + install_requires=['requests', 'six'], + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Environment :: Console', + 'Intended Audience :: Developers', + 'Intended Audience :: System Administrators', + 'License :: OSI Approved :: MIT License', + 'Operating System :: POSIX', + 'Operating System :: Microsoft :: Windows', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Topic :: Internet :: WWW/HTTP :: Site Management', + 'Topic :: System :: Systems Administration', + 'Topic :: Utilities', + ], +)