Skip to content

Commit

Permalink
working playwright?
Browse files Browse the repository at this point in the history
  • Loading branch information
MatthewZMSU committed Nov 21, 2024
1 parent a537f6b commit bbdf23d
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 100 deletions.
11 changes: 5 additions & 6 deletions scrapypuppeteer/browser_managers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,15 @@

class BrowserManager(ABC):
@abstractmethod
def _download_request(self, request: Request, spider) -> Union[Coroutine, Request]:
...
def _download_request(
self, request: Request, spider
) -> Union[Coroutine, Request]: ...

@abstractmethod
async def _start_browser_manager(self) -> None:
...
async def _start_browser_manager(self) -> None: ...

@abstractmethod
async def _stop_browser_manager(self) -> None:
...
async def _stop_browser_manager(self) -> None: ...

def download_request(self, request: Request, spider) -> Union[Deferred, Request]:
coro_or_request = self._download_request(request, spider)
Expand Down
27 changes: 20 additions & 7 deletions scrapypuppeteer/browser_managers/browser_downloader_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,29 @@

from scrapypuppeteer import CloseContextRequest
from scrapypuppeteer.browser_managers import BrowserManager
from scrapypuppeteer.browser_managers.playwright_browser_manager import PlaywrightBrowserManager
from scrapypuppeteer.browser_managers.playwright_browser_manager import (
PlaywrightBrowserManager,
)

# from scrapypuppeteer.browser_managers.pyppeteer_browser_manager import PyppeteerBrowserManager
from scrapypuppeteer.browser_managers.service_browser_manager import ServiceBrowserManager
from scrapypuppeteer.browser_managers.service_browser_manager import (
ServiceBrowserManager,
)
from scrapypuppeteer.request import ActionRequest


class BrowserDownloaderHandler(HTTPDownloadHandler):
"""
docstring: TODO
docstring: TODO
"""

EXECUTION_METHOD_SETTING = "EXECUTION_METHOD"

def __init__(self, settings, browser_manager: BrowserManager, crawler=None) -> None:
super().__init__(settings, crawler=crawler)
verify_installed_reactor("twisted.internet.asyncioreactor.AsyncioSelectorReactor")
verify_installed_reactor(
"twisted.internet.asyncioreactor.AsyncioSelectorReactor"
)

self.browser_manager = browser_manager

Expand All @@ -41,11 +48,17 @@ def from_crawler(cls, crawler: Crawler):
case "playwright":
browser_manager = PlaywrightBrowserManager()
case _:
raise ValueError(f"Invalid execution method: {execution_method.upper()}")
raise ValueError(
f"Invalid execution method: {execution_method.upper()}"
)

bdh = cls(settings, browser_manager, crawler=crawler)
crawler.signals.connect(bdh.browser_manager.start_browser_manager, signals.engine_started)
crawler.signals.connect(bdh.browser_manager.stop_browser_manager, signals.engine_stopped)
crawler.signals.connect(
bdh.browser_manager.start_browser_manager, signals.engine_started
) # This makes the start VERY slow
crawler.signals.connect(
bdh.browser_manager.stop_browser_manager, signals.engine_stopped
)
return bdh

def download_request(self, request, spider):
Expand Down
Loading

0 comments on commit bbdf23d

Please sign in to comment.