From 5f699693083db7001538d68d843c25703f915109 Mon Sep 17 00:00:00 2001 From: Bumsoo Kim Date: Wed, 24 Jul 2024 23:44:24 +0900 Subject: [PATCH] feat: add sync to readwise.io --- .editorconfig | 33 ++ .gitignore | 73 ++++ .pre-commit-config.yaml | 46 ++ .pylintrc.toml | 557 ++++++++++++++++++++++++ Makefile | 40 ++ README.md | 44 ++ pyproject.toml | 58 +++ requirements-dev.lock | 97 +++++ requirements.lock | 60 +++ src/ridiwise/__init__.py | 1 + src/ridiwise/__main__.py | 3 + src/ridiwise/api/__init__.py | 0 src/ridiwise/api/base_client.py | 39 ++ src/ridiwise/api/browser_base_client.py | 48 ++ src/ridiwise/api/readwise.py | 59 +++ src/ridiwise/api/ridibooks.py | 230 ++++++++++ src/ridiwise/cmd/__init__.py | 0 src/ridiwise/cmd/common_option.py | 67 +++ src/ridiwise/cmd/context.py | 18 + src/ridiwise/cmd/main.py | 65 +++ src/ridiwise/cmd/sync/__init__.py | 3 + src/ridiwise/cmd/sync/main.py | 83 ++++ src/ridiwise/cmd/utils.py | 133 ++++++ 23 files changed, 1757 insertions(+) create mode 100644 .editorconfig create mode 100644 .gitignore create mode 100644 .pre-commit-config.yaml create mode 100644 .pylintrc.toml create mode 100644 Makefile create mode 100644 pyproject.toml create mode 100644 requirements-dev.lock create mode 100644 requirements.lock create mode 100644 src/ridiwise/__init__.py create mode 100644 src/ridiwise/__main__.py create mode 100644 src/ridiwise/api/__init__.py create mode 100644 src/ridiwise/api/base_client.py create mode 100644 src/ridiwise/api/browser_base_client.py create mode 100644 src/ridiwise/api/readwise.py create mode 100644 src/ridiwise/api/ridibooks.py create mode 100644 src/ridiwise/cmd/__init__.py create mode 100644 src/ridiwise/cmd/common_option.py create mode 100644 src/ridiwise/cmd/context.py create mode 100644 src/ridiwise/cmd/main.py create mode 100644 src/ridiwise/cmd/sync/__init__.py create mode 100644 src/ridiwise/cmd/sync/main.py create mode 100644 src/ridiwise/cmd/utils.py diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..97070cb --- /dev/null +++ b/.editorconfig @@ -0,0 +1,33 @@ +root = true + +[*] +charset=utf-8 +end_of_line=lf +indent_size=4 +indent_style=space +insert_final_newline=true +trim_trailing_whitespaces=true +max_line_length = 80 + +[*.py] +indent_size=4 +quote_type=single +max_line_length = 88 + +[*.json] +indent_size=2 + +[{*.yml,*.yaml}] +indent_size=2 + +[*.sh] +indent_size=2 + +[*.md] +indent_size=2 + +[*.toml] +indent_size=2 + +[{Makefile,**.mk}] +indent_style = tab diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f4c7bbc --- /dev/null +++ b/.gitignore @@ -0,0 +1,73 @@ +### Python +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# pyenv +.python-version + +# Cython debug symbols +cython_debug/ + +# venv +.venv +venv/ + +### macOS +.DS_Store +.AppleDouble +.LSOverride + +### IDE +.idea/ +.vscode/ + +### env +.env* +!.env.example diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..4aa1156 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,46 @@ +default_stages: [commit, push] + +default_language_version: + python: python3.10 + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] + - id: end-of-file-fixer + - id: mixed-line-ending + - id: check-added-large-files + - id: check-toml + - id: check-yaml + + - repo: https://github.com/abravalheri/validate-pyproject + rev: v0.18 + hooks: + - id: validate-pyproject + + - repo: https://github.com/google/yamlfmt + rev: v0.13.0 + hooks: + - id: yamlfmt + args: ['-formatter', 'retain_line_breaks_single=true'] + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.4 + hooks: + # Run the linter. + - id: ruff + # Run the formatter. + - id: ruff-format + args: [--check] + + - repo: local + hooks: + - id: pylint + name: pylint + entry: pylint + language: system + types: [python] + args: ["-rn", "-sn"] + files: 'src/.*\.py$' diff --git a/.pylintrc.toml b/.pylintrc.toml new file mode 100644 index 0000000..8f22fa5 --- /dev/null +++ b/.pylintrc.toml @@ -0,0 +1,557 @@ +[tool.pylint.main] +# Analyse import fallback blocks. This can be used to support both Python 2 and 3 +# compatible code, which means that the block might have code that exists only in +# one or another interpreter, leading to false positives when analysed. +# analyse-fallback-blocks = + +# Clear in-memory caches upon conclusion of linting. Useful if running pylint in +# a server-like mode. +# clear-cache-post-run = + +# Always return a 0 (non-error) status code, even if lint errors are found. This +# is primarily useful in continuous integration scripts. +# exit-zero = + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +# extension-pkg-allow-list = + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. (This is an alternative name to extension-pkg-allow-list +# for backward compatibility.) +# extension-pkg-whitelist = + +# Return non-zero exit code if any of these messages/categories are detected, +# even if score is above --fail-under value. Syntax same as enable. Messages +# specified are enabled, while categories only check already-enabled messages. +# fail-on = + +# Specify a score threshold under which the program will exit with error. +fail-under = 10 + +# Interpret the stdin as a python script, whose filename needs to be passed as +# the module_or_package argument. +# from-stdin = + +# Files or directories to be skipped. They should be base names, not paths. +ignore = ["CVS"] + +# Add files or directories matching the regular expressions patterns to the +# ignore-list. The regex matches against paths and can be in Posix or Windows +# format. Because '\\' represents the directory delimiter on Windows systems, it +# can't be used as an escape character. +# ignore-paths = + +# Files or directories matching the regular expression patterns are skipped. The +# regex matches against base names, not paths. The default value ignores Emacs +# file locks +ignore-patterns = ["^\\.#"] + +# List of module names for which member attributes should not be checked and will +# not be imported (useful for modules/projects where namespaces are manipulated +# during runtime and thus existing member attributes cannot be deduced by static +# analysis). It supports qualified module names, as well as Unix pattern +# matching. +# ignored-modules = + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +# init-hook = + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use, and will cap the count on Windows to +# avoid hangs. +jobs = 1 + +# Control the amount of potential inferred values when inferring a single object. +# This can help the performance when dealing with large functions or complex, +# nested conditions. +limit-inference-results = 100 + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +# load-plugins = + +# Pickle collected data for later comparisons. +persistent = true + +# Resolve imports to .pyi stubs if available. May reduce no-member messages and +# increase not-an-iterable messages. +# prefer-stubs = + +# Minimum Python version to use for version dependent checks. Will default to the +# version used to run pylint. +py-version = "3.10" + +# Discover python modules and packages in the file system subtree. +# recursive = + +# Add paths to the list of the source roots. Supports globbing patterns. The +# source root is an absolute path or a path relative to the current working +# directory used to determine a package namespace for modules located under the +# source root. +# source-roots = + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode = true + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +# unsafe-load-any-extension = + +[tool.pylint.basic] +# Naming style matching correct argument names. +argument-naming-style = "snake_case" + +# Regular expression matching correct argument names. Overrides argument-naming- +# style. If left empty, argument names will be checked with the set naming style. +# argument-rgx = + +# Naming style matching correct attribute names. +attr-naming-style = "snake_case" + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. If left empty, attribute names will be checked with the set naming +# style. +# attr-rgx = + +# Bad variable names which should always be refused, separated by a comma. +bad-names = ["foo", "bar", "baz", "toto", "tutu", "tata"] + +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +# bad-names-rgxs = + +# Naming style matching correct class attribute names. +class-attribute-naming-style = "any" + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. If left empty, class attribute names will be checked +# with the set naming style. +# class-attribute-rgx = + +# Naming style matching correct class constant names. +class-const-naming-style = "UPPER_CASE" + +# Regular expression matching correct class constant names. Overrides class- +# const-naming-style. If left empty, class constant names will be checked with +# the set naming style. +# class-const-rgx = + +# Naming style matching correct class names. +class-naming-style = "PascalCase" + +# Regular expression matching correct class names. Overrides class-naming-style. +# If left empty, class names will be checked with the set naming style. +# class-rgx = + +# Naming style matching correct constant names. +const-naming-style = "UPPER_CASE" + +# Regular expression matching correct constant names. Overrides const-naming- +# style. If left empty, constant names will be checked with the set naming style. +# const-rgx = + +# Minimum line length for functions/classes that require docstrings, shorter ones +# are exempt. +docstring-min-length = -1 + +# Naming style matching correct function names. +function-naming-style = "snake_case" + +# Regular expression matching correct function names. Overrides function-naming- +# style. If left empty, function names will be checked with the set naming style. +# function-rgx = + +# Good variable names which should always be accepted, separated by a comma. +good-names = ["i", "j", "k", "ex", "Run", "_"] + +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +# good-names-rgxs = + +# Include a hint for the correct naming format with invalid-name. +# include-naming-hint = + +# Naming style matching correct inline iteration names. +inlinevar-naming-style = "any" + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. If left empty, inline iteration names will be checked +# with the set naming style. +# inlinevar-rgx = + +# Naming style matching correct method names. +method-naming-style = "snake_case" + +# Regular expression matching correct method names. Overrides method-naming- +# style. If left empty, method names will be checked with the set naming style. +# method-rgx = + +# Naming style matching correct module names. +module-naming-style = "snake_case" + +# Regular expression matching correct module names. Overrides module-naming- +# style. If left empty, module names will be checked with the set naming style. +# module-rgx = + +# Colon-delimited sets of names that determine each other's naming style when the +# name regexes allow several styles. +# name-group = + +# Regular expression which should only match function or class names that do not +# require a docstring. +no-docstring-rgx = "^_" + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. These +# decorators are taken in consideration only for invalid-name. +property-classes = ["abc.abstractproperty"] + +# Regular expression matching correct type alias names. If left empty, type alias +# names will be checked with the set naming style. +# typealias-rgx = + +# Regular expression matching correct type variable names. If left empty, type +# variable names will be checked with the set naming style. +# typevar-rgx = + +# Naming style matching correct variable names. +variable-naming-style = "snake_case" + +# Regular expression matching correct variable names. Overrides variable-naming- +# style. If left empty, variable names will be checked with the set naming style. +# variable-rgx = + +[tool.pylint.classes] +# Warn about protected attribute access inside special methods +# check-protected-access-in-special-methods = + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods = ["__init__", "__new__", "setUp", "asyncSetUp", "__post_init__"] + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected = ["_asdict", "_fields", "_replace", "_source", "_make", "os._exit"] + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg = ["cls"] + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg = ["mcs"] + +[tool.pylint.design] +# List of regular expressions of class ancestor names to ignore when counting +# public methods (see R0903) +# exclude-too-few-public-methods = + +# List of qualified class names to ignore when counting class parents (see R0901) +# ignored-parents = + +# Maximum number of arguments for function / method. +max-args = 5 + +# Maximum number of attributes for a class (see R0902). +max-attributes = 7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr = 5 + +# Maximum number of branch for function / method body. +max-branches = 12 + +# Maximum number of locals for function / method body. +max-locals = 15 + +# Maximum number of parents for a class (see R0901). +max-parents = 7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods = 20 + +# Maximum number of return / yield for function / method body. +max-returns = 6 + +# Maximum number of statements in function / method body. +max-statements = 50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods = 2 + +[tool.pylint.exceptions] +# Exceptions that will emit a warning when caught. +overgeneral-exceptions = ["builtins.BaseException", "builtins.Exception"] + +[tool.pylint.format] +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +# expected-line-ending-format = + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines = "^\\s*(# )??$" + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren = 4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string = " " + +# Maximum number of characters on a single line. +max-line-length = 88 + +# Maximum number of lines in a module. +max-module-lines = 1000 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +# single-line-class-stmt = + +# Allow the body of an if to be on the same line as the test if there is no else. +# single-line-if-stmt = + +[tool.pylint.imports] +# List of modules that can be imported at any level, not just the top level one. +# allow-any-import-level = + +# Allow explicit reexports by alias from a package __init__. +# allow-reexport-from-package = + +# Allow wildcard imports from modules that define __all__. +# allow-wildcard-with-all = + +# Deprecated modules which should not be used, separated by a comma. +# deprecated-modules = + +# Output a graph (.gv or any supported image format) of external dependencies to +# the given file (report RP0402 must not be disabled). +# ext-import-graph = + +# Output a graph (.gv or any supported image format) of all (i.e. internal and +# external) dependencies to the given file (report RP0402 must not be disabled). +# import-graph = + +# Output a graph (.gv or any supported image format) of internal dependencies to +# the given file (report RP0402 must not be disabled). +# int-import-graph = + +# Force import order to recognize a module as part of the standard compatibility +# libraries. +# known-standard-library = + +# Force import order to recognize a module as part of a third party library. +known-third-party = ["enchant"] + +# Couples of modules and preferred modules, separated by a comma. +# preferred-modules = + +[tool.pylint.logging] +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style = "old" + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules = ["logging"] + +[tool.pylint."messages control"] +# Only show warnings with the listed confidence levels. Leave empty to show all. +# Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE, UNDEFINED. +confidence = ["HIGH", "CONTROL_FLOW", "INFERENCE", "INFERENCE_FAILURE", "UNDEFINED"] + +# Disable the message, report, category or checker with the given id(s). You can +# either give multiple identifiers separated by comma (,) or put this option +# multiple times (only on the command line, not in the configuration file where +# it should appear only once). You can also use "--disable=all" to disable +# everything first and then re-enable specific checks. For example, if you want +# to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable = [ + "raw-checker-failed", + "bad-inline-option", + "locally-disabled", + "file-ignored", + "suppressed-message", + "useless-suppression", + "deprecated-pragma", + "use-symbolic-message-instead", + "use-implicit-booleaness-not-comparison-to-string", + "use-implicit-booleaness-not-comparison-to-zero", + "missing-docstring", +] + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where it +# should appear only once). See also the "--disable" option for examples. +# enable = + +[tool.pylint.method_args] +# List of qualified names (i.e., library.method) which require a timeout +# parameter e.g. 'requests.api.get,requests.api.post' +timeout-methods = ["requests.api.delete", "requests.api.get", "requests.api.head", "requests.api.options", "requests.api.patch", "requests.api.post", "requests.api.put", "requests.api.request"] + +[tool.pylint.miscellaneous] +# List of note tags to take in consideration, separated by a comma. +notes = ["FIXME", "XXX", "TODO"] + +# Regular expression of note tags to take in consideration. +# notes-rgx = + +[tool.pylint.refactoring] +# Maximum number of nested blocks for function / method body +max-nested-blocks = 5 + +# Complete name of functions that never returns. When checking for inconsistent- +# return-statements if a never returning function is called then it will be +# considered as an explicit return statement and no message will be printed. +never-returning-functions = ["sys.exit", "argparse.parse_error"] + +# Let 'consider-using-join' be raised when the separator to join on would be non- +# empty (resulting in expected fixes of the type: ``"- " + " - ".join(items)``) +suggest-join-with-non-empty-separator = true + +[tool.pylint.reports] +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'fatal', 'error', 'warning', 'refactor', +# 'convention', and 'info' which contain the number of messages in each category, +# as well as 'statement' which is the total number of statements analyzed. This +# score is used by the global evaluation report (RP0004). +evaluation = "max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10))" + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +# msg-template = + +# Set the output format. Available formats are: text, parseable, colorized, json2 +# (improved json format), json (old json format) and msvs (visual studio). You +# can also give a reporter class, e.g. mypackage.mymodule.MyReporterClass. +# output-format = + +# Tells whether to display a full report or only the messages. +# reports = + +# Activate the evaluation score. +score = true + +[tool.pylint.similarities] +# Comments are removed from the similarity computation +ignore-comments = true + +# Docstrings are removed from the similarity computation +ignore-docstrings = true + +# Imports are removed from the similarity computation +ignore-imports = true + +# Signatures are removed from the similarity computation +ignore-signatures = true + +# Minimum lines number of a similarity. +min-similarity-lines = 4 + +[tool.pylint.spelling] +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions = 4 + +# Spelling dictionary name. No available dictionaries : You need to install both +# the python package and the system dependency for enchant to work. +# spelling-dict = + +# List of comma separated words that should be considered directives if they +# appear at the beginning of a comment and should not be checked. +spelling-ignore-comment-directives = "fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:" + +# List of comma separated words that should not be checked. +# spelling-ignore-words = + +# A path to a file that contains the private dictionary; one word per line. +# spelling-private-dict-file = + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +# spelling-store-unknown-words = + +[tool.pylint.typecheck] +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators = ["contextlib.contextmanager"] + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +# generated-members = + +# Tells whether missing members accessed in mixin class should be ignored. A +# class is considered mixin if its name matches the mixin-class-rgx option. +# Tells whether to warn about missing members when the owner of the attribute is +# inferred to be None. +ignore-none = true + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference can +# return multiple potential results while evaluating a Python object, but some +# branches might not be evaluated, which results in partial inference. In that +# case, it might be useful to still emit no-member and other checks for the rest +# of the inferred objects. +ignore-on-opaque-inference = true + +# List of symbolic message names to ignore for Mixin members. +ignored-checks-for-mixins = ["no-member", "not-async-context-manager", "not-context-manager", "attribute-defined-outside-init"] + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes = ["optparse.Values", "thread._local", "_thread._local", "argparse.Namespace"] + +# Show a hint with possible names when a member name was not found. The aspect of +# finding the hint is based on edit distance. +missing-member-hint = true + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance = 1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices = 1 + +# Regex pattern to define which classes are considered mixins. +mixin-class-rgx = ".*[Mm]ixin" + +# List of decorators that change the signature of a decorated function. +# signature-mutators = + +[tool.pylint.variables] +# List of additional names supposed to be defined in builtins. Remember that you +# should avoid defining new builtins when possible. +# additional-builtins = + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables = true + +# List of names allowed to shadow builtins +# allowed-redefined-builtins = + +# List of strings which can identify a callback function by name. A callback name +# must start or end with one of those strings. +callbacks = ["cb_", "_cb"] + +# A regular expression matching the name of dummy variables (i.e. expected to not +# be used). +dummy-variables-rgx = "_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_" + +# Argument names that match this expression will be ignored. +ignored-argument-names = "_.*|^ignored_|^unused_" + +# Tells whether we should check for unused import in __init__ files. +# init-import = + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules = ["six.moves", "past.builtins", "future.builtins", "builtins", "io"] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1823d97 --- /dev/null +++ b/Makefile @@ -0,0 +1,40 @@ +VENV := ./.venv/bin + +.PHONY: install +install: + rye sync + $(VENV)/pre-commit install + $(VENV)/playwright install chromium + +.PHONY: lint +lint: + $(VENV)/ruff check src + $(VENV)/pylint src + +.PHONY: lint-fix +lint-fix: + $(VENV)/ruff check --fix src + +.PHONY: format +format: +# sort imports + $(VENV)/ruff check --select I --fix src +# format code + $(VENV)/ruff format --diff src || true + $(VENV)/ruff format src + +.PHONY: test +test: + $(VENV)/pytest \ + --cov-report term-missing:skip-covered \ + --cov-report html \ + --cov-report xml \ + -vvv \ + tests + | tee pytest-coverage.txt + +clean: + rm -rf htmlcov pytest-coverage.txt + +%: + @: diff --git a/README.md b/README.md index 1ec9ff7..b29c2fd 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,46 @@ # ridiwise + Sync Ridibooks book notes to Readwise.io + +## Installation + +### Prerequisites + +- Python 3.10 or later +- [Playwright](https://playwright.dev/python/docs/intro) + +Install playwright: +```bash +# with pip +pip install playwright + +# or with pipx +pipx install playwright + +# install browser +playwright install chromium +``` + +Install ridiwise: +```bash +# with pip +pip install git+https://github.com/bskim45/ridiwise.git + +# or with pipx +pipx install git+https://github.com/bskim45/ridiwise.git +``` + +## Usage + +```bash +$ ridiwise --help + Usage: ridiwise [OPTIONS] COMMAND [ARGS]... + + ridiwise: Sync Ridibooks book notes to Readwise.io + + (...) +``` + +## License + +The code is released under the MIT license. See [LICENSE](LICENSE) for details. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..158814c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,58 @@ +[project] +name = "ridiwise" +description = "Sync Ridibooks book notes to Readwise.io" +authors = [ + { name = "Bumsoo Kim", email = "bskim45@gmail.com" } +] +dependencies = [ + "typer>=0.12.3", + "httpx>=0.27.0", + "browser-cookie3>=0.19.1", + "merge-args>=0.1.5", + "playwright>=1.45.1", +] +readme = "README.md" +license = { file = "LICENSE" } +requires-python = ">= 3.10" + +dynamic = ["version"] + +[project.scripts] +"ridiwise" = "ridiwise.cmd.main:app" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.rye] +managed = true +dev-dependencies = [ + "pre-commit>=3.7.1", + "pytest>=8.3.1", + "ruff>=0.5.4", + "pylint>=3.2.6", +] + +[tool.hatch.metadata] +allow-direct-references = true + +[tool.hatch.build.targets.wheel] +packages = ["src/ridiwise"] + +[tool.hatch.version] +path = "src/ridiwise/__init__.py" + +[tool.ruff] +line-length = 88 +indent-width = 4 +src = ["src", "tests"] + +[tool.ruff.lint] +extend-select = [ + # https://docs.astral.sh/ruff/rules/ + "E501", # line-too-long, + "I", # isort +] + +[tool.ruff.format] +quote-style = "single" diff --git a/requirements-dev.lock b/requirements-dev.lock new file mode 100644 index 0000000..6eac1cd --- /dev/null +++ b/requirements-dev.lock @@ -0,0 +1,97 @@ +# generated by rye +# use `rye lock` or `rye sync` to update this lockfile +# +# last locked with the following flags: +# pre: false +# features: [] +# all-features: false +# with-sources: false +# generate-hashes: false +# universal: false + +-e file:. +anyio==4.4.0 + # via httpx +astroid==3.2.4 + # via pylint +browser-cookie3==0.19.1 + # via ridiwise +certifi==2024.7.4 + # via httpcore + # via httpx +cfgv==3.4.0 + # via pre-commit +click==8.1.7 + # via typer +dill==0.3.8 + # via pylint +distlib==0.3.8 + # via virtualenv +filelock==3.15.4 + # via virtualenv +greenlet==3.0.3 + # via playwright +h11==0.14.0 + # via httpcore +httpcore==1.0.5 + # via httpx +httpx==0.27.0 + # via ridiwise +identify==2.6.0 + # via pre-commit +idna==3.7 + # via anyio + # via httpx +iniconfig==2.0.0 + # via pytest +isort==5.13.2 + # via pylint +lz4==4.3.3 + # via browser-cookie3 +markdown-it-py==3.0.0 + # via rich +mccabe==0.7.0 + # via pylint +mdurl==0.1.2 + # via markdown-it-py +merge-args==0.1.5 + # via ridiwise +nodeenv==1.9.1 + # via pre-commit +packaging==24.1 + # via pytest +platformdirs==4.2.2 + # via pylint + # via virtualenv +playwright==1.45.1 + # via ridiwise +pluggy==1.5.0 + # via pytest +pre-commit==3.7.1 +pycryptodomex==3.20.0 + # via browser-cookie3 +pyee==11.1.0 + # via playwright +pygments==2.18.0 + # via rich +pylint==3.2.6 +pytest==8.3.1 +pyyaml==6.0.1 + # via pre-commit +rich==13.7.1 + # via typer +ruff==0.5.4 +shellingham==1.5.4 + # via typer +sniffio==1.3.1 + # via anyio + # via httpx +tomlkit==0.13.0 + # via pylint +typer==0.12.3 + # via ridiwise +typing-extensions==4.12.2 + # via pyee + # via typer +virtualenv==20.26.3 + # via pre-commit diff --git a/requirements.lock b/requirements.lock new file mode 100644 index 0000000..d7298b9 --- /dev/null +++ b/requirements.lock @@ -0,0 +1,60 @@ +# generated by rye +# use `rye lock` or `rye sync` to update this lockfile +# +# last locked with the following flags: +# pre: false +# features: [] +# all-features: false +# with-sources: false +# generate-hashes: false +# universal: false + +-e file:. +anyio==4.4.0 + # via httpx +browser-cookie3==0.19.1 + # via ridiwise +certifi==2024.7.4 + # via httpcore + # via httpx +click==8.1.7 + # via typer +greenlet==3.0.3 + # via playwright +h11==0.14.0 + # via httpcore +httpcore==1.0.5 + # via httpx +httpx==0.27.0 + # via ridiwise +idna==3.7 + # via anyio + # via httpx +lz4==4.3.3 + # via browser-cookie3 +markdown-it-py==3.0.0 + # via rich +mdurl==0.1.2 + # via markdown-it-py +merge-args==0.1.5 + # via ridiwise +playwright==1.45.1 + # via ridiwise +pycryptodomex==3.20.0 + # via browser-cookie3 +pyee==11.1.0 + # via playwright +pygments==2.18.0 + # via rich +rich==13.7.1 + # via typer +shellingham==1.5.4 + # via typer +sniffio==1.3.1 + # via anyio + # via httpx +typer==0.12.3 + # via ridiwise +typing-extensions==4.12.2 + # via pyee + # via typer diff --git a/src/ridiwise/__init__.py b/src/ridiwise/__init__.py new file mode 100644 index 0000000..1f356cc --- /dev/null +++ b/src/ridiwise/__init__.py @@ -0,0 +1 @@ +__version__ = '1.0.0' diff --git a/src/ridiwise/__main__.py b/src/ridiwise/__main__.py new file mode 100644 index 0000000..31cf072 --- /dev/null +++ b/src/ridiwise/__main__.py @@ -0,0 +1,3 @@ +from ridiwise.cmd.main import app + +app(prog_name='ridiwise') diff --git a/src/ridiwise/api/__init__.py b/src/ridiwise/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/ridiwise/api/base_client.py b/src/ridiwise/api/base_client.py new file mode 100644 index 0000000..24a0842 --- /dev/null +++ b/src/ridiwise/api/base_client.py @@ -0,0 +1,39 @@ +import abc +import logging +import typing + +from httpx import Auth, Client, Request, Response + + +class HTTPTokenAuth(Auth): + """Attaches HTTP Token Authentication to a given Request object.""" + + def __init__(self, keyword='Bearer', token=None): + self.keyword = keyword + self.token = token + + def auth_flow(self, request: Request) -> typing.Generator[Request, Response, None]: + request.headers['Authorization'] = f'{self.keyword} {self.token}' + yield request + + +class BaseClient(metaclass=abc.ABCMeta): + provider: str + base_url: str + + def __init__(self, *args, **kwargs): + self.client = Client(base_url=self.base_url, *args, **kwargs) + self.logger = logging.getLogger(name=self.provider) + + def __enter__(self): + return self + + def __exit__(self, *args): + self.client.close() + + # pylint: disable=unnecessary-dunder-call + def start(self): + self.__enter__() + + def close(self): + self.__exit__() diff --git a/src/ridiwise/api/browser_base_client.py b/src/ridiwise/api/browser_base_client.py new file mode 100644 index 0000000..21f6bac --- /dev/null +++ b/src/ridiwise/api/browser_base_client.py @@ -0,0 +1,48 @@ +from pathlib import Path + +from playwright.sync_api import sync_playwright + +from ridiwise.api.base_client import BaseClient + + +class BrowserBaseClient(BaseClient): + storage_state_filename = 'browser_state.json' + + # pylint: disable=keyword-arg-before-vararg,fixme + # TODO: fix lint error + def __init__( + self, + cache_dir: Path, + headless: bool = True, + *args, + **kwargs, + ): + self.cache_dir = cache_dir + self.headless = headless + + self.playwright = None + self.browser = None + self.browser_context = None + + super().__init__(*args, **kwargs) + + def __enter__(self): + self.playwright = sync_playwright().start() + self.browser = self.playwright.chromium.launch(headless=self.headless) + + try: + self.browser_context = self.browser.new_context( + storage_state=self.cache_dir / self.storage_state_filename + ) + except FileNotFoundError: + self.browser_context = self.browser.new_context() + + super().__enter__() + return self + + def __exit__(self, *args): + self.browser_context.close() + self.browser.close() + self.playwright.stop() + + super().__exit__(*args) diff --git a/src/ridiwise/api/readwise.py b/src/ridiwise/api/readwise.py new file mode 100644 index 0000000..ac6c25a --- /dev/null +++ b/src/ridiwise/api/readwise.py @@ -0,0 +1,59 @@ +import json +from typing import Optional, TypedDict + +import httpx + +from ridiwise.api.base_client import BaseClient, HTTPTokenAuth + +API_BASE_URL = 'https://readwise.io/api/v2' + + +class CreateHighlight(TypedDict, total=False): + text: str + title: str + author: Optional[str] + category: Optional[str] + location: Optional[str] + location_type: Optional[str] + highlighted_at: Optional[str] + source_url: Optional[str] + source_type: Optional[str] + image_url: Optional[str] + note: Optional[str] + highlight_url: Optional[str] + + +class ReadwiseClient(BaseClient): + base_url = API_BASE_URL + provider = 'readwise' + + def __init__(self, token, *args, **kwargs): + if not token: + raise ValueError(f'{self.provider}: `token` must be provided') + + self.auth = HTTPTokenAuth(keyword='Token', token=token) + super().__init__(*args, **kwargs) + + def validate_token(self): + try: + response = self.client.get('/auth/', auth=self.auth) + response.raise_for_status() + return True + except httpx.HTTPStatusError as e: + if e.response.status_code == 401: + self.logger.error('Invalid Readwise token.') + return False + + raise e + + def create_highlights( + self, + highlights: list[CreateHighlight], + ): + payload = {'highlights': highlights} + + self.logger.debug(json.dumps(payload, indent=2, ensure_ascii=False)) + + response = self.client.post('/highlights/', auth=self.auth, json=payload) + response.raise_for_status() + return response.json() diff --git a/src/ridiwise/api/ridibooks.py b/src/ridiwise/api/ridibooks.py new file mode 100644 index 0000000..5e86499 --- /dev/null +++ b/src/ridiwise/api/ridibooks.py @@ -0,0 +1,230 @@ +import datetime +import http.cookiejar +import re +from typing import Optional, TypedDict +from zoneinfo import ZoneInfo + +from playwright.sync_api import ( + ElementHandle, +) +from playwright.sync_api import ( + TimeoutError as PlaywrightTimeoutError, +) + +from ridiwise.api.browser_base_client import BrowserBaseClient + +DOMAIN = 'ridibooks.com' +COOKIE_DOMAIN = f'https://{DOMAIN}' + +SELECTOR_LOGIN_USER_ID = 'input[placeholder="아이디"]' +SELECTOR_LOGIN_PASSWORD = 'input[placeholder="비밀번호"]' + + +# pylint: disable=import-outside-toplevel +def get_cookie_jar(browser: str) -> http.cookiejar.CookieJar: + import browser_cookie3 + + try: + cookie_jar_function = getattr(browser_cookie3, browser) + cookie_jar = cookie_jar_function(domain_name=DOMAIN) + return cookie_jar + except AttributeError as e: + raise RuntimeError(f'Browser "{browser}" is not supported.') from e + except Exception as e: + raise RuntimeError('Unable to import cookies from browser.') from e + + +class Note(TypedDict): + id: str + highlighted_text: str + memo: Optional[str] + created_date: Optional[datetime.datetime] + + +class Book(TypedDict): + book_title: str + book_url: str + book_notes_url: str + book_id: str + notes: list[Note] + authors: list[str] + + +class RidiClient(BrowserBaseClient): + base_url = f'https://{DOMAIN}' + provider = 'ridibooks' + storage_state_filename = 'browser_state_ridibooks.json' + + def __init__( + self, + user_id: str, + password: str, + *args, + **kwargs, + ): + self.user_id = user_id + self.password = password + + super().__init__(*args, **kwargs) + + @staticmethod + def extract_book_id(uri) -> Optional[str]: + """ + Extracts the book_id from a given URI. + """ + pattern = re.compile(r'/reading-note/detail/(\d+)') + match = pattern.search(uri) + if match: + return match.group(1) + return None + + @staticmethod + def parse_note_date(date_string) -> Optional[datetime.datetime]: + """ + Parses a date string in the format 'YYYY.MM.DD.' + """ + pattern = re.compile(r'(\d{4})\.(\d{2})\.(\d{2})\.') + + # Use regex to find the date components + match = pattern.match(date_string) + if match: + year, month, day = map(int, match.groups()) + return datetime.datetime(year, month, day, tzinfo=ZoneInfo('Asia/Seoul')) + + return None + + def login(self): + self.logger.info('Login: `ridibooks.com`') + + with self.browser_context.new_page() as page: + page.goto( + f'{self.base_url}/account/login?return_url=https%3A%2F%2Fridibooks.com%2Faccount%2Fmyridi' # pylint: disable=line-too-long # noqa: E501 + ) + + page.wait_for_selector(SELECTOR_LOGIN_USER_ID) + + page.locator(SELECTOR_LOGIN_USER_ID).fill(self.user_id) + page.locator(SELECTOR_LOGIN_PASSWORD).fill(self.password) + + page.click('button[type="submit"]') + + try: + page.wait_for_url('**/myridi', timeout=3000) + self.cache_dir.mkdir(parents=True, exist_ok=True) + self.browser_context.storage_state( + path=self.cache_dir / self.storage_state_filename + ) + except PlaywrightTimeoutError as e: + self.logger.error('Login timeout') + raise e + + def is_authenticated(self) -> bool: + with self.browser_context.new_page() as page: + res = page.request.get(f'{self.base_url}/account/myridi', max_redirects=0) + return res.ok + + def is_cookie_authenticated(self): + return all( + next( + ( + cookie['value'] + for cookie in self.browser_context.cookies(COOKIE_DOMAIN) + if cookie['name'] == auth_key + ), + None, + ) + for auth_key in ['ridi-at', 'ridi-rt'] + ) + + def get_books_from_shelf(self) -> list[Book]: + if not self.is_authenticated(): + self.logger.info('Login required') + self.login() + + with self.browser_context.new_page() as page: + page.goto(f'{self.base_url}/reading-note/shelf') + items = page.query_selector_all('article li') + + books = [self._get_book_info_from_dom(item) for item in items] + + for book in books: + book['notes'] = self.get_notes_by_book(book['book_id']) + + return books + + def _get_book_info_from_dom(self, elem: ElementHandle) -> Book: + book_title = elem.query_selector('h3').inner_text() + book_ids = [ + self.extract_book_id(link.get_attribute('href')) + for link in elem.query_selector_all('a') + if link.get_attribute('href').startswith('/reading-note/detail/') + ] + + # pylint: disable=consider-using-set-comprehension + book_id_set = set([book_id for book_id in book_ids if book_id is not None]) + + if len(book_id_set) != 1: + raise ValueError('Failed to get book id') + + book_id = book_id_set.pop() + + authors = [ + link.inner_text() + for link in elem.query_selector_all('a') + if link.get_attribute('href').startswith('/author/') + ] + + return { + 'book_title': book_title, + 'book_url': f'{self.base_url}/books/{book_id}', + 'book_notes_url': f'{self.base_url}/reading-note/detail/{book_id}', + 'book_id': book_id, + 'notes': [], + 'authors': authors, + } + + def get_notes_by_book(self, book_id): + if not self.is_cookie_authenticated(): + self.login() + + with self.browser_context.new_page() as page: + page.goto(f'{self.base_url}/reading-note/detail/{book_id}') + + # pylint: disable=fixme + # TODO: Implement handling when the number of notes is very large + for _ in range(5): + try: + more_button = page.locator('article button:has-text("더보기")') + if not more_button.is_visible(timeout=500): + break + more_button.click() + except PlaywrightTimeoutError: + break + + note_items = page.query_selector_all('article li[id^="annotation_"]') + + notes = [self._get_note_from_dom(item) for item in note_items] + return notes + + def _get_note_from_dom(self, elem: ElementHandle) -> Optional[Note]: + annotation_id = elem.get_attribute('id').removeprefix('annotation_') + items = elem.query_selector_all('p') + + if not items: + return None + + highlighted_text = items[0].inner_text().strip() + + if len(items) == 3: + memo = items[1].inner_text().strip() + created_date = items[2].inner_text().strip() + else: + memo = None + created_date = items[1].inner_text().strip() + + return { + 'id': annotation_id, + 'highlighted_text': highlighted_text, + 'memo': memo, + 'created_date': self.parse_note_date(created_date), + } diff --git a/src/ridiwise/cmd/__init__.py b/src/ridiwise/cmd/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/ridiwise/cmd/common_option.py b/src/ridiwise/cmd/common_option.py new file mode 100644 index 0000000..08c703d --- /dev/null +++ b/src/ridiwise/cmd/common_option.py @@ -0,0 +1,67 @@ +import enum +from typing import Optional + +import typer + +from ridiwise.cmd.context import AuthState + + +@enum.unique +class RidiAuthMethod(enum.StrEnum): + # BROWSER_COOKIE = 'browser_cookie' + HEADLESS_BROWSER = 'headless_browser' + + +def check_common_options( + ctx: typer.Context, + auth_method: RidiAuthMethod, + user_id: Optional[str], + password: Optional[str], + headless_mode: bool, +): + auth_state: AuthState = { + 'auth_method': auth_method, + } + ctx.obj['auth'] = auth_state + + ctx.obj['headless_mode'] = headless_mode + + if auth_method == RidiAuthMethod.HEADLESS_BROWSER: + if not all([user_id, password]): + raise typer.BadParameter('`user_id` and `password` must be provided.') + + auth_state['user_id'] = user_id + auth_state['password'] = password + + +def common_params( + ctx: typer.Context, + auth_method: RidiAuthMethod = typer.Option( + default=RidiAuthMethod.HEADLESS_BROWSER, + envvar='RIDI_AUTH_METHOD', + help='Authentication method to use with Ridibooks.', + ), + user_id: Optional[str] = typer.Option( + default=None, + envvar='RIDI_USER_ID', + help='Ridibooks user ID.', + ), + password: Optional[str] = typer.Option( + default=None, + envvar='RIDI_PASSWORD', + help='Ridibooks password.', + ), + headless_mode: bool = typer.Option( + True, + envvar='HEADLESS_MODE', + help='Hide the browser window (headless mode).', + ), +): + ctx.ensure_object(dict) + check_common_options( + ctx=ctx, + auth_method=auth_method, + user_id=user_id, + password=password, + headless_mode=headless_mode, + ) diff --git a/src/ridiwise/cmd/context.py b/src/ridiwise/cmd/context.py new file mode 100644 index 0000000..aedb569 --- /dev/null +++ b/src/ridiwise/cmd/context.py @@ -0,0 +1,18 @@ +import logging +from pathlib import Path +from typing import Optional, TypedDict + + +class AuthState(TypedDict, total=False): + auth_method: str + user_id: Optional[str] + password: Optional[str] + + +class ContextState(TypedDict): + logger: logging.Logger + auth: AuthState + config_dir: Path + cache_dir: Path + + headless_mode: bool diff --git a/src/ridiwise/cmd/main.py b/src/ridiwise/cmd/main.py new file mode 100644 index 0000000..6b6170e --- /dev/null +++ b/src/ridiwise/cmd/main.py @@ -0,0 +1,65 @@ +import logging +from pathlib import Path +from typing import Optional + +import typer +from typing_extensions import Annotated + +from ridiwise import __version__ +from ridiwise.cmd import sync + +app = typer.Typer( + context_settings={'help_option_names': ['-h', '--help']}, + no_args_is_help=True, +) +app.add_typer( + sync.app, + name='sync', + no_args_is_help=True, +) + + +def setup_logging(log_level: int = logging.WARNING): + logging.basicConfig( + level=log_level, + ) + + +def version_callback(value: bool): + if value: + print(f'{__version__}') + raise typer.Exit() + + +@app.callback() +def main( + ctx: typer.Context, + # pylint: disable=unused-argument + version: Annotated[ + Optional[bool], + typer.Option('--version', callback=version_callback, is_eager=True), + ] = None, + config_dir: Annotated[ + Optional[Path], + typer.Option(envvar='RIDIWISE_CONFIG_DIR', help='Config home path'), + ] = '~/.config/ridiwise', + cache_dir: Annotated[ + Optional[Path], + typer.Option( + envvar='RIDIWISE_CACHE_DIR', help='Cache home path', writable=True + ), + ] = '~/.cache/ridiwise', +): + """ + ridiwise: Sync Ridibooks book notes to Readwise.io + """ + ctx.ensure_object(dict) + ctx.obj = { + 'logger': logging.getLogger('ridiwise'), + 'config_dir': Path(config_dir).expanduser(), + 'cache_dir': Path(cache_dir).expanduser(), + } + + +if __name__ == '__main__': + app() diff --git a/src/ridiwise/cmd/sync/__init__.py b/src/ridiwise/cmd/sync/__init__.py new file mode 100644 index 0000000..3626f49 --- /dev/null +++ b/src/ridiwise/cmd/sync/__init__.py @@ -0,0 +1,3 @@ +from .main import app + +__all__ = ['app'] diff --git a/src/ridiwise/cmd/sync/main.py b/src/ridiwise/cmd/sync/main.py new file mode 100644 index 0000000..8c4cd7c --- /dev/null +++ b/src/ridiwise/cmd/sync/main.py @@ -0,0 +1,83 @@ +import typer +from typing_extensions import Annotated + +from ridiwise.api.readwise import ReadwiseClient +from ridiwise.api.ridibooks import RidiClient +from ridiwise.cmd.common_option import common_params +from ridiwise.cmd.context import AuthState +from ridiwise.cmd.utils import with_extra_parameters + +app = typer.Typer() + + +@app.callback() +def main(): + """ + Sync Ridibooks book notes to another service. + """ + + +@app.command() +@with_extra_parameters(common_params) +def readwise( + ctx: typer.Context, + readwise_token: Annotated[ + str, + typer.Option( + envvar='READWISE_TOKEN', + help='Readwise.io API token. https://readwise.io/access_token', + ), + ], +): + """ + Sync Ridibooks book notes to Readwise.io. + """ + + logger = ctx.obj['logger'] + auth_state: AuthState = ctx.obj['auth'] + + with ( + RidiClient( + user_id=auth_state['user_id'], + password=auth_state['password'], + cache_dir=ctx.obj['cache_dir'], + headless=ctx.obj['headless_mode'], + ) as ridi_client, + ReadwiseClient(token=readwise_token) as readwise_client, + ): + books = ridi_client.get_books_from_shelf() + + result_count = { + 'books': 0, + 'highlights': 0, + } + + for book in books: + readwise_client.create_highlights( + highlights=[ + { + 'text': note['highlighted_text'], + 'title': book['book_title'], + 'source_type': 'ridibooks', + 'category': 'books', + 'author': ', '.join(book['authors']), + 'highlighted_at': note['created_date'].isoformat(), + 'note': note['memo'], + 'source_url': book['book_url'], + 'highlight_url': f'{book["book_notes_url"]}#annotation_{note["id"]}', # noqa: E501 pylint: disable=line-too-long + } + for note in book['notes'] + ] + ) + + result_count['books'] += 1 + result_count['highlights'] += len(book['notes']) + + logger.info( + 'Created Readwise highlights: ' + f"`{book['book_title']}` / {len(book['notes'])}" + ) + + print('Synced notes to Readwise.io:') + print('Books: ', result_count['books']) + print('Highlights: ', result_count['highlights']) diff --git a/src/ridiwise/cmd/utils.py b/src/ridiwise/cmd/utils.py new file mode 100644 index 0000000..469c919 --- /dev/null +++ b/src/ridiwise/cmd/utils.py @@ -0,0 +1,133 @@ +import asyncio +import sys +from functools import wraps +from inspect import Parameter, Signature, signature +from operator import itemgetter +from typing import Sequence + + +def typer_async(f): + @wraps(f) + def wrapper(*args, **kwargs): + return asyncio.run(f(*args, **kwargs)) + + return wrapper + + +def is_help_option(): + return any(['-h' in sys.argv, '--help' in sys.argv]) + + +def merge_signatures( + first: Signature, second: Signature, *, drop: Sequence[str] = None +) -> Signature: + """ + https://github.com/tiangolo/typer/issues/153#issuecomment-2016834465 + + Merge two signatures. + + Returns a new signature where the parameters of the second signature have been + injected in the first if they weren't already there (i.e. same name not found). + + The following rules are used: + - parameter order is preserved, with parameter from first signature coming + first, and parameters from second one coming after + - when a parameter (same name) is found in both signatures, parameter from + first signature is kept, but if its annotation or default value is Ellipsis + they are replaced with annotation and default value coming from second + parameter. + - parameters in second signature whose name appear in drop list are not + taken into account + + Once this is done, we do not have a valid signature. The following extra step + are performed: + - move all positional only parameter first. Positional only parameters will + still be ordered together, but some parameters from second signature will + now appear before parameters from first signature (the non positional only + ones). + - make sure we have at most one variadic parameter of each kind (keyword and + non keyword). They can appear in both original signature but under same + name. Otherwise a ValueError is raised. + - move keyword only parameters last (just before variadic keyword perameter) + - keyword only parameters are left as is. It does not seem to be a problem is + some of have default values and appear before other keyword only parameters + without default value. + + Result is still not a valid signature as we could have some positional only + parameter with a default value, followed by non keyword or positional without + default value. In this case, a ValueError will be raised. + """ + params = dict(first.parameters) + + if drop is None: + drop = [] + + for n, p1 in second.parameters.items(): + if n in drop: + continue + + if p0 := params.get(n): + if p0.default is Ellipsis or p0.default is Parameter.empty: + p0 = p0.replace(default=p1.default) + if p0.annotation is Parameter.empty: + p0 = p0.replace(annotation=p1.annotation) + params[n] = p0 + else: + params[n] = p1 + + # Sort params by kind, moving params with default value after params without + # default value within each kind group. + params = sorted( + params.values(), + key=lambda p: 2 * p.kind + bool(p.default != Parameter.empty), + ) + + # Will raise if signature not valid + return first.replace(parameters=params) + + +def with_extra_parameters(extra, *, drop: Sequence[str] = None): + """ + https://github.com/tiangolo/typer/issues/153#issuecomment-2016834465 + + Append extra parameters to Typer command. + """ + if drop is None: + drop = [] + + def wrapper(command): + s0 = signature(extra) + s1 = signature(command) + s2 = merge_signatures(s1, s0, drop=drop) + + # Correct dispatch with variadic args and PO parameters would be more tricky. + # Don't konw if we have such use cases with typer. + assert not any( + p.kind + in ( + Parameter.POSITIONAL_ONLY, + Parameter.VAR_POSITIONAL, + Parameter.VAR_KEYWORD, + ) + for p in s2.parameters.values() + ) + + n0 = list(p.name for p in s0.parameters.values() if p.name not in drop) + n1 = list(p.name for p in s1.parameters.values()) + a0 = itemgetter(*n0) + a1 = itemgetter(*n1) + + @wraps(command) + def wrapped(*args, **kwargs): + b2 = s2.bind(*args, **kwargs) + b2.apply_defaults() + + # Process extra parameters + extra(**dict(zip(n0, a0(b2.arguments)))) + # Invoke typer command + return command(**dict(zip(n1, a1(b2.arguments)))) + + setattr(wrapped, '__signature__', s2) + return wrapped + + return wrapper