Daily a Cloud function can be used to retrieve the list of avaiable media tags from GTM, being possible to identify which tags are avaiable and which ones that are no being triggered. All data is stored in a BigQuery table and may enrich media quality's analysis.
This tutorial aims to serve as guide to get data about tags, triggers,.. from Google Tag Manager API. The idea is to use HTTP requests to access the data instead of using Python libraries.
⚠️ The data will be stored in a BigQuery table. This feature is optional and can improve report analysis.
There are some steps that need to be followed:
-
1.1 Create a GCP project;
-
1.2 Activate Tag Manager API in
APIs & Services > Enabled APIs & services
; -
1.3 Go to
APIs & Services > Credentials > + Create Credentials > Service Account
and create se service account. Don't forget to save the credentials in a JSON file (optional); -
1.4 Create an API key in
APIs & Services > Credentials > + Create Credentials > API key
. It's important to restrict the key to only "Tag Manager API" (Figure 1).
- Using the avaiable code create a Cloud Funtion (using Python 3.10);
- Schedule the Cloud Function execution. The Google Cloud Schedule can be used to schedule a daily execution of the cloud function.
To connect GTM with the services from GCP it's important to add the service account associated with the Cloud Function as a GTM user.
Figure 2 - Add service account as GTM userThe data obtained from GTM is stored in BigQuery. For example, was created the dataset media_quality
and the table media-quality-gtm-tags
.
Table created in BigQuery:
Column Name | Description |
---|---|
account_id | Google Tag Manager account ID |
container_id | Google Tag Manager container ID |
firingTrigger_id | Trigger ID |
workspace_id | Google Tag Manager workspace ID |
name | Tag name |
tag_id | Tag ID |
tag_type | Tag type |
snapshot_date | Date of snapshot |
timestamp | Date and time of data insertion |
To create the table in BigQuery use the Python function as below:
def bq_create_media_tags_table(project_name, dataset_name, table_name, client) -> None:
r"""Create dataset dataset and table in Big Query
Args:
project_name (string): name of GCP project
dataset_name (string): name o Big Query dataset
table_name (string): name of table
client: Big Query Client instance
"""
client.query(f"CREATE SCHEMA IF NOT EXISTS {dataset_name}")
client.query(f"CREATE TABLE IF NOT EXISTS {project_name}.{dataset_name}.{table_name} ( \
account_id STRING,\
container_id STRING, \
firing_trigger_id STRING, \
workspace_id STRING, \
name STRING, \
tracking_id STRING, \
tag_id STRING, \
tag_type STRING, \
snapshot_date DATE, \
timestamp TIMESTAMP )")
The code is used in Cloud Function to list GTM tags and filter only "Media Tags"
from google.oauth2 import service_account
from google.cloud import bigquery
import google.auth.transport.requests
import requests
import json
import datetime
from google.auth import default
# ------------------------------------
RUN_AS_CLOUD_FUNCTION = True # Change if deploy as cloud function
# ------------------------------------
"""
HOW TO RUN LOCALLY
$ pip install functions-framework
$ functions-framework --target my_function
Which will start a local development server at http://localhost:8080.
To invoke it locally for an HTTP function:
$ curl http://localhost:8080
For a background function with non-binary data:
$ curl -d '{"data": {"hi": "there"}}' -X POST \
-H "Content-Type: application/json" \
http://localhost:8080
"""
# Authentication settings
SCOPES = ['https://www.googleapis.com/auth/tagmanager.readonly', "https://www.googleapis.com/auth/cloud-platform"]
SERVICE_ACCOUNT_FILE = './credentials/teste-gtm-api-3629bf94ffcb.json' # optional
API_KEY = "AIzaSyBvG_HlAWbu3iLrGCA91jJ13REDFZRR588" # Change values
# GTM info
GTM_ACCOUNT = "21165" # Change values
GTM_CONTAINER = "22238" # Change values
GTM_WORKSPACE = "1000120" # Change values
# GCP info
PROJECT_NAME = "dp6-raft-suite" # Change values
DATASET_NAME = "dp6_media_quality" # Change values
TABLE_NAME = "media-quality-gtm-tags" # Change values
def bq_insert_to_table(data, table_id, client) -> None:
r"""Insert data to Big Query table
Args:
data (list of JSON): data to be inserted into table
table_id (string): table id from Big Query in format <projectId>.<datasetId>.<tableName>
"""
table_obj = client.get_table(table_id)
errors = client.insert_rows(table=table_obj, rows=data)
if errors == []:
print("New rows have been added.")
else:
print("Encountered errors while inserting rows: {}".format(errors))
def _get_credentials():
r""" Get credentials from GCP.
If constant RUN_AS_CLOUD_FUNCTION is true the credential will be acquired from GCP credential's default.
If constant RUN_AS_CLOUD_FUNCTION is false the credential will be acquired from JSON file.
"""
credentials = None
# Creates a Credentials instance from a service account json file
if RUN_AS_CLOUD_FUNCTION:
credentials, _ = default(scopes=SCOPES)
else:
credentials = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
# Created authenticated session
auth_req = google.auth.transport.requests.Request()
# Refreshes token
credentials.refresh(auth_req)
# Return token
return credentials
def list_tags(gtm_account, gtm_container, gtm_workspace, api_key, token):
r""" List all GTM tags
Args:
gtm_account (string): Google Tag Manager account number
gtm_container (string): Google Tag Manager container number
gtm_workspace (string): Google Tag Manager workspace number
api_key (string): API key create in GCP to interact with GTM API
token (string): Token required for request
Outputs:
response_json (json): dictionary of tags from GTM
"""
endpoint = f"https://tagmanager.googleapis.com/tagmanager/v2/accounts/{gtm_account}/containers/{gtm_container}/workspaces/{gtm_workspace}/tags?key={api_key}"
headers = {"Accept": "application/json", "Authorization": f"Bearer {token}"}
try:
# Make a HTTP GET request
response = requests.get(url=endpoint, headers=headers)
response_json = response.json()
return response_json
except:
raise Exception
def _parse_media_tags(list_of_tags):
r"""Filter media tags and parse data
Args:
list_of_tags (json): dictionary with all tags
Output:
json with parsed data for media tags
"""
media_json_list = []
current_date = datetime.datetime.now()
current_date_formatted = current_date.strftime("%Y-%m-%d")
for tag in list_of_tags["tag"]:
add_to_list = False
tracking_id = "undefined"
json_sanity_check = ("monitoringMetadata" in tag) and ("map" in tag["monitoringMetadata"])
if json_sanity_check == True:
for param in tag["monitoringMetadata"]["map"]:
if param.get("key") == "exclude" and param.get("value") == "false":
add_to_list = True
if param.get("key") == "tracking_id":
tracking_id = param["value"]
if add_to_list:
reduced_json = { "account_id": tag["accountId"],
"container_id": tag["containerId"],
"firing_trigger_id": tag["firingTriggerId"][0],
"workspace_id": tag["workspaceId"],
"name": tag["name"],
"tracking_id": tracking_id,
"tag_id": tag["tagId"],
"tag_type": tag["type"],
"snapshot_date": current_date_formatted,
"timestamp": current_date }
media_json_list.append(reduced_json)
return media_json_list
def main(request):
# Get credentials and token
credentials = _get_credentials()
token = credentials.token
# Create Big Query client
bq_client = bigquery.Client(credentials=credentials)
# Get list of tags from Google Tag Manager (GTM)
list_of_tags = list_tags(gtm_account=GTM_ACCOUNT, gtm_container=GTM_CONTAINER, gtm_workspace=GTM_WORKSPACE, api_key=API_KEY, token=token)
# Filter media tags and parse data
media_json_list = _parse_media_tags(list_of_tags)
# Save list of JSONs to Big Query table
bq_insert_to_table(data=media_json_list, table_id=f"{PROJECT_NAME}.{DATASET_NAME}.{TABLE_NAME}", client=bq_client)
if RUN_AS_CLOUD_FUNCTION:
return "Success", 200
Code requirements.txt file:
Authlib==1.0.1
gcloud==0.18.3
google-api-core==2.8.2
google-auth==2.9.1
google-cloud-bigquery==3.3.2
google-cloud-bigquery-storage==2.14.2
google-cloud-core==2.3.2
google-cloud-storage==2.4.0
google-crc32c==1.3.0
google-resumable-media==2.3.3
googleapis-common-protos==1.56.4
ipython==8.4.0
matplotlib==3.5.2
numpy==1.23.1
requests==2.28.1
PyYAML==6.0
The GTM's API returns a list
of tags in JSON format. For example, the following JSON contains information about three tags:
{
"tag": [
{
"accountId": "6054543647",
"consentSettings": {
"consentStatus": "notSet"
},
"containerId": "92108926",
"fingerprint": "1661519637804",
"firingTriggerId": ["2147479553"],
"monitoringMetadata": {
"map": [
{
"key": "exclude",
"type": "template",
"value": "false"
},
{
"key": "tracking_id",
"type": "template",
"value": "1123"
},
{
"key": "media_name",
"type": "template",
"value": "Hello"
}
],
"type": "map"
},
"monitoringMetadataTagNameKey": "name",
"name": "Say Hello",
"parameter": [
{
"key": "html",
"type": "template",
"value": "<script>\nconsole.log(\"HELLO!!!\");\n</script>"
},
{
"key": "supportDocumentWrite",
"type": "boolean",
"value": "false"
}
],
"path": "accounts/6054543647/containers/92108926/workspaces/2/tags/3",
"tagFiringOption": "oncePerEvent",
"tagId": "3",
"tagManagerUrl": "https://tagmanager.google.com/#/container/accounts/6054543647/containers/92108926/workspaces/2/tags/3?apiLink=tag",
"type": "html",
"workspaceId": "2"
},
{
"accountId": "6054543647",
"consentSettings": {
"consentStatus": "notSet"
},
"containerId": "92108926",
"fingerprint": "1661516539340",
"firingTriggerId": ["2147479553"],
"monitoringMetadata": {
"type": "map"
},
"name": "Tag agora VAI",
"parameter": [
{
"key": "html",
"type": "template",
"value": "<script>console.log(\"AGORA VAI DISPARADA!\");</script>"
},
{
"key": "supportDocumentWrite",
"type": "boolean",
"value": "false"
}
],
"path": "accounts/6054543647/containers/92108926/workspaces/2/tags/4",
"tagFiringOption": "oncePerEvent",
"tagId": "4",
"tagManagerUrl": "https://tagmanager.google.com/#/container/accounts/6054543647/containers/92108926/workspaces/2/tags/4?apiLink=tag",
"type": "html",
"workspaceId": "2"
},
{
"accountId": "6054543647",
"consentSettings": {
"consentStatus": "notSet"
},
"containerId": "92108926",
"fingerprint": "1661517732659",
"firingTriggerId": ["6"],
"monitoringMetadata": {
"type": "map"
},
"name": "DQ - MEDIA QUALITY - CF",
"parameter": [
{
"key": "fetchReference",
"type": "template",
"value": "codigo do fetch"
},
{
"key": "clientId",
"type": "template",
"value": "11234567"
},
{
"key": "cfEndpoint",
"type": "template",
"value": "https://www.meuendpointwwwwwwwwwwwwwwwwwwwwwwwwwww.cloudfunctions.com"
},
{
"key": "method",
"type": "template",
"value": "post"
},
{
"key": "typeEndpoint",
"type": "template",
"value": "cf"
},
{
"key": "autoCollect",
"type": "boolean",
"value": "true"
},
{
"key": "domain",
"type": "template",
"value": "www.dp6.com.br"
},
{
"key": "sample",
"type": "template",
"value": "100"
}
],
"path": "accounts/6054543647/containers/92108926/workspaces/2/tags/7",
"tagFiringOption": "oncePerEvent",
"tagId": "7",
"tagManagerUrl": "https://tagmanager.google.com/#/container/accounts/6054543647/containers/92108926/workspaces/2/tags/7?apiLink=tag",
"type": "cvt_92108926_5",
"workspaceId": "2"
}
]
}