Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid segment fault in ASIC/SDK health event handling when vendor SAI passes an invalid timestamp #3446

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion orchagent/switchorch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1115,8 +1115,22 @@ void SwitchOrch::onSwitchAsicSdkHealthEvent(sai_object_id_t switch_id,
const string &severity_str = switch_asic_sdk_health_event_severity_reverse_map.at(severity);
const string &category_str = switch_asic_sdk_health_event_category_reverse_map.at(category);
string description_str;
const std::time_t &t = (std::time_t)timestamp.tv_sec;
std::time_t t = (std::time_t)timestamp.tv_sec;
const std::time_t now = std::time(0);
const double year_in_seconds = 86400 * 365;
stringstream time_ss;

/*
* In case vendor SAI passed a very large timestamp, put_time can cause segment fault which can not be caught by try/catch infra
* We check the difference between the timestamp from SAI and the current time and force to use current time if the gap is too large
* By doing so, we can avoid the segment fault
*/
if (difftime(t, now) > year_in_seconds)
{
SWSS_LOG_ERROR("Invalid timestamp second %" PRIx64 " in received ASIC/SDK health event, reset to current time", timestamp.tv_sec);
t = now;
}
Comment on lines +1128 to +1132
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think this could be make as static helper function to correct_time etc, and it would be easier to test something like this


time_ss << std::put_time(std::localtime(&t), "%Y-%m-%d %H:%M:%S");

switch (data.data_type)
Expand Down
69 changes: 45 additions & 24 deletions tests/mock_tests/switchorch_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,44 @@ namespace switchorch_test
gSwitchOrch = new SwitchOrch(m_app_db.get(), switch_tables, stateDbSwitchTable);
}

void checkAsicSdkHealthEvent(const sai_timespec_t &timestamp, const string &expected_key="")
{
initSwitchOrch();

sai_switch_health_data_t data;
memset(&data, 0, sizeof(data));
data.data_type = SAI_HEALTH_DATA_TYPE_GENERAL;
vector<uint8_t> data_from_sai({100, 101, 115, 99, 114, 105, 112, 116, 105, 245, 111, 110, 245, 10, 123, 125, 100, 100});
sai_u8_list_t description;
description.list = data_from_sai.data();
description.count = (uint32_t)(data_from_sai.size() - 2);
on_switch_asic_sdk_health_event(gSwitchId,
SAI_SWITCH_ASIC_SDK_HEALTH_SEVERITY_FATAL,
timestamp,
SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_FW,
data,
description);

string key;
if (expected_key.empty())
{
vector<string> keys;
gSwitchOrch->m_asicSdkHealthEventTable->getKeys(keys);
key = keys[0];
}
else
{
key = expected_key;
}
string value;
gSwitchOrch->m_asicSdkHealthEventTable->hget(key, "category", value);
ASSERT_EQ(value, "firmware");
gSwitchOrch->m_asicSdkHealthEventTable->hget(key, "severity", value);
ASSERT_EQ(value, "fatal");
gSwitchOrch->m_asicSdkHealthEventTable->hget(key, "description", value);
ASSERT_EQ(value, "description\n{}");
}

void TearDown() override
{
::testing_db::reset();
Expand Down Expand Up @@ -289,30 +327,13 @@ namespace switchorch_test

TEST_F(SwitchOrchTest, SwitchOrchTestHandleEvent)
{
initSwitchOrch();

sai_timespec_t timestamp = {.tv_sec = 1701160447, .tv_nsec = 538710245};
sai_switch_health_data_t data;
memset(&data, 0, sizeof(data));
data.data_type = SAI_HEALTH_DATA_TYPE_GENERAL;
vector<uint8_t> data_from_sai({100, 101, 115, 99, 114, 105, 112, 116, 105, 245, 111, 110, 245, 10, 123, 125, 100, 100});
sai_u8_list_t description;
description.list = data_from_sai.data();
description.count = (uint32_t)(data_from_sai.size() - 2);
on_switch_asic_sdk_health_event(gSwitchId,
SAI_SWITCH_ASIC_SDK_HEALTH_SEVERITY_FATAL,
timestamp,
SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_FW,
data,
description);

string key = "2023-11-28 08:34:07";
string value;
gSwitchOrch->m_asicSdkHealthEventTable->hget(key, "category", value);
ASSERT_EQ(value, "firmware");
gSwitchOrch->m_asicSdkHealthEventTable->hget(key, "severity", value);
ASSERT_EQ(value, "fatal");
gSwitchOrch->m_asicSdkHealthEventTable->hget(key, "description", value);
ASSERT_EQ(value, "description\n{}");
checkAsicSdkHealthEvent(timestamp, "2023-11-28 08:34:07");
}

TEST_F(SwitchOrchTest, SwitchOrchTestHandleEventInvalidTimeStamp)
{
sai_timespec_t timestamp = {.tv_sec = 172479515853275099, .tv_nsec = 538710245};
checkAsicSdkHealthEvent(timestamp);
}
}
Loading