ソースを参照

Z991239-5406 #comment 修复握手异常和lost事件同时处理的缺陷;握手异常和lost处理最大不超过30(默认值,可配置)

oilyang 1 年間 前
コミット
9aaeb2862a

+ 1 - 0
Module/mod_healthmanager/HealthManagerFSM.cpp

@@ -108,6 +108,7 @@ ErrorCodeEnum CHealthManagerFSM::Initial()
 }
 ErrorCodeEnum CHealthManagerFSM::OnInit(void)
 {
+	DbgWithLink(LOG_LEVEL_DEBUG, LOG_TYPE_SYSTEM)("Complied at: %s %s", __DATE__, __TIME__);
 	return Initial();
 }
 ErrorCodeEnum CHealthManagerFSM::OnExit(void)

+ 34 - 28
Module/mod_healthmanager/mod_healthmanager.cpp

@@ -350,14 +350,11 @@ ErrorCodeEnum CHealthManagerEntity::RestartModule(const char* pEntityName)
 	if (it == m_modRunInfo.end())
 	{
 		DbgWithLink(LOG_LEVEL_INFO,LOG_TYPE_SYSTEM)("add %s to modfuninfo(%d).",pEntityName,m_modRunInfo.size());
-		m_modRunInfo[pEntityName].count = 1;
 		m_modRunInfo[pEntityName].dwStart = GetTickCountRVC();
-
 	}
 	else
 	{
-		(m_modRunInfo[pEntityName].count)++;
-		DbgWithLink(LOG_LEVEL_INFO, LOG_TYPE_SYSTEM)("modruninfo EntityName=%s, count=%d", pEntityName, m_modRunInfo[pEntityName].count);
+		DbgWithLink(LOG_LEVEL_INFO, LOG_TYPE_SYSTEM)("modruninfo EntityName=%s", pEntityName);
 	}
 	CSmartPointer<IAsynWaitSp> spWait;
 	ErrorCodeEnum eErrCode  = Error_Succeed;
@@ -880,15 +877,15 @@ bool CHealthManagerEntity::DoRestart()
 	ToCalcRebootHourAndMinute(tmpHourBegin, tmpHourEnd);
 
 	do {
-		m_maxLostTimes = 30;
+		m_maxAbnormalTimes = 30;
 		int value(0);
-		spCerConfig->ReadConfigValueInt(GetEntityName(), "MaxLostTimes", value);
+		spCerConfig->ReadConfigValueInt(GetEntityName(), "MaxAbnormalTimes", value);
 		if (value > 0) {
-			m_maxLostTimes = value;
+			m_maxAbnormalTimes = value;
 		}
 	} while (false);
 
-	DbgWithLink(LOG_LEVEL_INFO, LOG_TYPE_SYSTEM)("MaxLostTimes:%d", m_maxLostTimes);
+	DbgWithLink(LOG_LEVEL_INFO, LOG_TYPE_SYSTEM)("MaxAbnormalTimes:%d", m_maxAbnormalTimes);
 	do {
 		int value(0);
 		spCerConfig->ReadConfigValueInt(GetEntityName(), "StopSelfCheck", value);
@@ -1783,9 +1780,11 @@ void CHealthManagerEntity::OnEntityStateHook(const char* pszEntityName, const ch
 			LogWarn(Severity_High, Error_Unexpect, HealthManager_UserErrorCode_EntityLost_Start + esi.wEntityDevelopID, tmpWarnMsg.GetData());
 			DbgWithLink(LOG_LEVEL_WARN, LOG_TYPE_SYSTEM).setLogCode("QLR0402501Z001").setResultCode(csResultCode.GetData())(tmpWarnMsg.GetData());
 			//doing nothing before enter main page. HandShake will take it.
-			if (m_bEnterMainPageEver && m_modRunInfo[pszEntityName].lostCount < m_maxLostTimes)
+			//AND if selfcheck is doing, no action at all
+			if (m_bEnterMainPageEver && m_modRunInfo[pszEntityName].abnormalCount < m_maxAbnormalTimes && !m_modRunInfo[pszEntityName].bAbnormalBusy)
 			{
-				m_modRunInfo[pszEntityName].lostCount++;
+				m_modRunInfo[pszEntityName].bAbnormalBusy = true;
+				m_modRunInfo[pszEntityName].abnormalCount++;
 				EntityLostProcTask* task;
 				//oiltmp@20240415 to be delete after x months
 				if (_strnicmp("CardIssuer", pszEntityName, strlen("CardIssuer")) == 0)
@@ -1849,23 +1848,27 @@ void CHealthManagerEntity::OnAnswer(CSmartPointer<IAsynWaitSp> pAsynWaitSp)
 	new_entry->state = entry->state;
 
 	if (new_entry->op == Test_ShakeHand && new_entry->ErrorResult != Error_Succeed) {
-		DbgWithLink(LOG_LEVEL_WARN, LOG_TYPE_SYSTEM)("shake hand %s turns out %s, entity state: %s", (LPCTSTR)new_entry->EntityName, SpStrError(new_entry->ErrorResult), SpStrEntityState((EntityStateEnum)new_entry->state));
-		//oiltmp@20240415 to be delete after x months
-		if (_strnicmp("CardIssuer", new_entry->EntityName.GetData(), strlen("CardIssuer")) == 0)
+		DbgWithLink(LOG_LEVEL_WARN, LOG_TYPE_SYSTEM)("shake hand %s turns out %s, entity state: %s, bBusy:%d", (LPCTSTR)new_entry->EntityName
+			, SpStrError(new_entry->ErrorResult), SpStrEntityState((EntityStateEnum)new_entry->state), m_modRunInfo[new_entry->EntityName].bAbnormalBusy);
+		if (!m_modRunInfo[new_entry->EntityName].bAbnormalBusy)
 		{
-			if (m_sysStaticInfo.strMachineType.Compare("RVC.CardStore") == 0 || m_sysStaticInfo.strMachineType.Compare("RVC.CardPrinter") == 0)
+			//oiltmp@20240415 to be delete after x months
+			if (_strnicmp("CardIssuer", new_entry->EntityName.GetData(), strlen("CardIssuer")) == 0)
 			{
-				DbgWithLink(LOG_LEVEL_DEBUG, LOG_TYPE_SYSTEM)("ExceptionErrorProcess CardIssuerStore");
-				ExceptionErrorProcess("CardIssuerStore", new_entry->ErrorResult);
+				if (m_sysStaticInfo.strMachineType.Compare("RVC.CardStore") == 0 || m_sysStaticInfo.strMachineType.Compare("RVC.CardPrinter") == 0)
+				{
+					DbgWithLink(LOG_LEVEL_DEBUG, LOG_TYPE_SYSTEM)("ExceptionErrorProcess CardIssuerStore");
+					ExceptionErrorProcess("CardIssuerStore", new_entry->ErrorResult);
+				}
+				else
+				{
+					DbgWithLink(LOG_LEVEL_DEBUG, LOG_TYPE_SYSTEM)("ExceptionErrorProcess CardIssuerStand");
+					ExceptionErrorProcess("CardIssuerStand", new_entry->ErrorResult);
+				}
 			}
 			else
-			{
-				DbgWithLink(LOG_LEVEL_DEBUG, LOG_TYPE_SYSTEM)("ExceptionErrorProcess CardIssuerStand");
-				ExceptionErrorProcess("CardIssuerStand", new_entry->ErrorResult);
-			}
+				ExceptionErrorProcess((const char*)new_entry->EntityName, new_entry->ErrorResult);
 		}
-		else
-			ExceptionErrorProcess((const char*)new_entry->EntityName, new_entry->ErrorResult);
 	}
 }
 
@@ -1881,7 +1884,9 @@ ErrorCodeEnum CHealthManagerEntity::ExceptionErrorProcess(const char* pszEntityN
 	case Error_Unexpect:
 	case Error_InvalidState:
 	{
-		m_modRunInfo[pszEntityName].bBusy = true;
+		m_modRunInfo[pszEntityName].bAbnormalBusy = true;
+		DbgWithLink(LOG_LEVEL_INFO, LOG_TYPE_SYSTEM)("ExceptionErrorProcess %s, abnormalCount:%d", pszEntityName, m_modRunInfo[pszEntityName].abnormalCount);
+		m_modRunInfo[pszEntityName].abnormalCount++;
 		eErrCode = pFuncPrivilege->StopEntity(pszEntityName, spWait);
 		if (eErrCode == Error_Succeed)
 		{
@@ -1920,7 +1925,7 @@ ErrorCodeEnum CHealthManagerEntity::ExceptionErrorProcess(const char* pszEntityN
 			DbgWithLink(LOG_LEVEL_WARN, LOG_TYPE_SYSTEM)("(re)Start %s failed(%d).", pszEntityName, eErrCode);
 			break;
 		}
-		m_modRunInfo[pszEntityName].bBusy = false;
+		m_modRunInfo[pszEntityName].bAbnormalBusy = false;
 	}
 	break;
 	default:
@@ -1987,6 +1992,7 @@ void CHealthManagerEntity::OnSelfCheckTimeout()
 	//非"A":其他启动失败导致进关门页,不再对硬件以及其他没必要自检的实体进行自检
 	CSimpleStringA tmpTerminalStage("");
 	GetFunction()->GetSysVar("TerminalStage", tmpTerminalStage);
+
 	for (int i = 0; i < activeEnCount; ++i)
 	{
 		CSimpleStringA pszEntityName(m_activeEntity[i]);
@@ -2007,15 +2013,15 @@ void CHealthManagerEntity::OnSelfCheckTimeout()
 				)
 				continue;
 		}
-		if (m_modRunInfo[pszEntityName].lostCount < m_maxLostTimes || !m_modRunInfo[pszEntityName].bBusy)//oilyang@20240524 lost too much times,no shake hand
+		//oilyang@20240524 abnormal within MAX limited times, AND NOT in lost process, else no check at all
+		if (m_modRunInfo[pszEntityName].abnormalCount < m_maxAbnormalTimes && !m_modRunInfo[pszEntityName].bAbnormalBusy)
 			CheckEntity(m_activeEntity[i].GetData(), Test_ShakeHand);
 	}
 	GetFunction()->ResetTimer(HEALTHMANAGER_SELFCHECK_TIMER_ID, HEALTHMANAGER_SELFCHECK_TIMER_INTERVAL);
 }
 void CHealthManagerEntity::EntityLostProc(CSimpleStringA entityName)
 {
-	m_modRunInfo[entityName].bBusy = true;
-	DbgWithLink(LOG_LEVEL_INFO, LOG_TYPE_SYSTEM)("EntityLostProc %s", entityName.GetData());
+	DbgWithLink(LOG_LEVEL_INFO, LOG_TYPE_SYSTEM)("EntityLostProc %s, lost %d times from VTM started", entityName.GetData(), m_modRunInfo[entityName].abnormalCount);
 	CSmartPointer<IEntityFunction> pFunc = GetFunction();
 	CSmartPointer<IEntityFunctionPrivilege> pFuncPrivilege = pFunc.ConvertCase<IEntityFunctionPrivilege>();
 	CSmartPointer<IAsynWaitSp> spWait;
@@ -2041,7 +2047,7 @@ void CHealthManagerEntity::EntityLostProc(CSimpleStringA entityName)
 		DbgWithLink(LOG_LEVEL_WARN, LOG_TYPE_SYSTEM)("TokenKeeper lost");
 		m_fsm.ToReAccessAuth();
 	}
-	m_modRunInfo[entityName].bBusy = false;
+	m_modRunInfo[entityName].bAbnormalBusy = false;
 }
 SP_BEGIN_ENTITY_MAP()
 	SP_ENTITY(CHealthManagerEntity)

+ 5 - 6
Module/mod_healthmanager/mod_healthmanager.h

@@ -44,9 +44,8 @@ enum DealType
 struct ModuleRunInfo
 {
 	DWORD dwStart;
-	int count;
-	int lostCount;
-	bool bBusy;
+	int abnormalCount;
+	bool bAbnormalBusy;
 };
 struct RestartModeInfo
 {
@@ -198,8 +197,8 @@ public:
 					m_activeEntity = ctx->Req.entityList.Split('|');
 					for (int i = 0; i < m_activeEntity.GetCount(); ++i)
 					{
-						m_modRunInfo[m_activeEntity[i]].lostCount = 0;
-						m_modRunInfo[m_activeEntity[i]].bBusy = false;
+						m_modRunInfo[m_activeEntity[i]].abnormalCount = 0;
+						m_modRunInfo[m_activeEntity[i]].bAbnormalBusy = false;
 					}
 				}
 			}
@@ -268,7 +267,7 @@ private:
 		, m_uuidRemoteController, m_uuidGUIConsole, m_uuidHeartBeat, m_uuidIE, m_uuidCenterS, m_uuidVtmLoader;
 	CUUID m_uuidPublic, m_uuid4SIPPhone;
 	int m_stopSelfCheck;
-	int m_restartHour, m_restartMinute, m_lastHour, m_preDay,m_guardianCount, m_maxLostTimes;
+	int m_restartHour, m_restartMinute, m_lastHour, m_preDay,m_guardianCount, m_maxAbnormalTimes;
 	bool m_bInit, m_bWaitRestartPC, m_bScreenLock, m_bSayIdle
 		, m_bGuardianRun, m_bVerRollback, m_bBrowserIdleFirst
 		, m_bInMainPage, m_bEnterMainPageEver, m_bHaveThrowMainPage, m_bToRestartByCenterSetting;