From 5a930c7a2d1fb397cf1ba9f3e210b50f8688fabf Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 13 Jan 2025 18:46:45 +0400 Subject: [PATCH] Add a non-whitespace pattern to SearchQueriesParam. (#74) * Add a non-whitespace pattern to SearchQueriesParam. * Fix the regex. * Improve the regex. * Add a validation test for serp. --- tests/test_serp.py | 28 +++++++++++++++++++++++++++ zyte_spider_templates/spiders/serp.py | 1 + 2 files changed, 29 insertions(+) diff --git a/tests/test_serp.py b/tests/test_serp.py index 4dde596..9b80cd7 100644 --- a/tests/test_serp.py +++ b/tests/test_serp.py @@ -1,6 +1,7 @@ from urllib.parse import quote_plus import pytest +from pydantic import ValidationError from scrapy import Request from scrapy_spider_metadata import get_spider_metadata from scrapy_zyte_api.responses import ZyteAPITextResponse @@ -288,6 +289,7 @@ def test_metadata(): {"type": "null"}, ], "description": "Input 1 search query per line (e.g. foo bar).", + "pattern": r"(.|\r?\n)*\S+(.|\r?\n)*", "title": "Search Queries", "widget": "textarea", }, @@ -764,3 +766,29 @@ def test_item_type_mappings(): # Also ensure that no dict value is repeated. assert len(actual_keys) == len(set(ITEM_TYPE_CLASSES.values())) + + +@pytest.mark.parametrize( + "input_data,raises", + [ + ({"search_queries": "foo"}, False), + ({"search_queries": "foo "}, False), + ({"search_queries": " foo "}, False), + ({"search_queries": " fo o "}, False), + ({"search_queries": "fo o"}, False), + ({"search_queries": "fo\n o "}, False), + ({"search_queries": ["fo", " o "]}, False), + ({"search_queries": ["fo", " "]}, False), + ({"search_queries": " "}, True), + ({"search_queries": ""}, True), + ({"search_queries": " "}, True), + ({"search_queries": " \n "}, True), + ({"search_queries": [" ", " "]}, True), + ], +) +def test_query_validation(input_data, raises): + if raises: + with pytest.raises(ValidationError): + GoogleSearchSpider(**input_data) + else: + GoogleSearchSpider(**input_data) diff --git a/zyte_spider_templates/spiders/serp.py b/zyte_spider_templates/spiders/serp.py index 6ee001e..b6162a6 100644 --- a/zyte_spider_templates/spiders/serp.py +++ b/zyte_spider_templates/spiders/serp.py @@ -93,6 +93,7 @@ class SearchQueriesParam(BaseModel): description="Input 1 search query per line (e.g. foo bar).", json_schema_extra={ "widget": "textarea", + "pattern": r"(.|\r?\n)*\S+(.|\r?\n)*", }, )