#pragma once #include "daal.h" #include "algorithms/optimization_solver/objective_function/logistic_loss_types.h" #include "algorithms/optimization_solver/objective_function/logistic_loss_batch.h" #include "service_numeric_table.h" #include "service_threading.h" #include "service_math_mkl.h" #include "logistic_loss_dense_default_batch_kernel.h" using namespace daal; using namespace daal::algorithms::optimization_solver; using namespace daal::internal; using namespace daal::algorithms::optimization_solver::logistic_loss::internal; namespace logistic_prox_function { template class BatchContainer : public logistic_loss::BatchContainer { public: BatchContainer(daal::services::Environment::env* daalEnv) :logistic_loss::BatchContainer(daalEnv) {}; /** Default destructor */ virtual ~BatchContainer() {}; virtual services::Status compute() DAAL_C11_OVERRIDE; }; template static void sigmoids(algorithmFPType* exp, size_t n) { PRAGMA_IVDEP PRAGMA_VECTOR_ALWAYS for (size_t i = 0; i < n; ++i) { const auto sigm = algorithmFPType(1.0) / (algorithmFPType(1.0) + exp[i]); exp[i] = sigm; exp[i + n] = 1 - sigm; } } template daal::services::Status BatchContainer::compute() { logistic_loss::Input* input = static_cast(_in); objective_function::Result* result = static_cast(_res); logistic_loss::Parameter* parameter = static_cast(_par); daal::services::Environment::env& env = *_env; NumericTable* valueNT = nullptr; NumericTable* hessianNT = nullptr; NumericTable* gradientNT = nullptr; NumericTable* nonSmoothTermValue = nullptr; NumericTable* proximalProjection = nullptr; NumericTable* lipschitzConstant = nullptr; NumericTable* xTable = input->get(logistic_loss::data).get(); // input data set NumericTable* yTable = input->get(logistic_loss::dependentVariables).get(); // array of dependent variables NumericTable* argumentTable = input->get(logistic_loss::argument).get(); // argument of the objective function if (parameter->resultsToCompute & objective_function::value) valueNT = result->get(objective_function::valueIdx).get(); if (parameter->resultsToCompute & objective_function::hessian) hessianNT = result->get(objective_function::hessianIdx).get(); if (parameter->resultsToCompute & objective_function::gradient) gradientNT = result->get(objective_function::gradientIdx).get(); if (parameter->resultsToCompute & objective_function::nonSmoothTermValue) { nonSmoothTermValue = result->get(objective_function::nonSmoothTermValueIdx).get(); } if (parameter->resultsToCompute & objective_function::proximalProjection) { proximalProjection = result->get(objective_function::proximalProjectionIdx).get(); } if (parameter->resultsToCompute & objective_function::lipschitzConstant) { lipschitzConstant = result->get(objective_function::lipschitzConstantIdx).get(); } const size_t p = argumentTable->getNumberOfRows(); const size_t nBeta = p + 1; const size_t n = xTable->getNumberOfRows(); TArrayScalable aX(n * p); TArrayScalable aY(n); algorithmFPType* x = aX.get(); algorithmFPType* y = aY.get(); const algorithmFPType* b; HomogenNumericTable* hmgBeta = dynamic_cast*>(argumentTable); b = (*hmgBeta).getArray(); if (proximalProjection) { DAAL_ASSERT(proximalProjection->getNumberOfRows() == nBeta); algorithmFPType* prox; HomogenNumericTable* hmgProx = dynamic_cast*>(proximalProjection); prox = hmgProx->getArray(); prox[0] = b[0]; for (int i = 1; i < nBeta; i++) { if (b[i] > parameter->penaltyL1) { prox[i] = b[i] - parameter->penaltyL1; } if (b[i] < -parameter->penaltyL1) { prox[i] = b[i] + parameter->penaltyL1; } if (std::fabs(b[i]) <= parameter->penaltyL1) { prox[i] = 0; } } } if (lipschitzConstant) { DAAL_ASSERT(lipschitzConstant->getNumberOfRows() == 1); WriteRows lipschitzConstantPtr(lipschitzConstant, 0, 1); algorithmFPType& c = *lipschitzConstantPtr.get(); const size_t blockSize = 256; size_t nBlocks = n / blockSize; nBlocks += (nBlocks * blockSize != n); algorithmFPType globalMaxNorm = 0; const auto nThreads = daal::threader_get_threads_number(); TlsMem > tlsData(lipschitzConstant->getNumberOfRows()); daal::threader_for(nBlocks, nBlocks, [&](const size_t iBlock) { algorithmFPType& _maxNorm = *tlsData.local(); const size_t startRow = iBlock * blockSize; const size_t finishRow = (iBlock + 1 == nBlocks ? n : (iBlock + 1) * blockSize); algorithmFPType curentNorm = 0; for (size_t i = startRow; i < finishRow; i++) { curentNorm = 0; for (int j = 0; j < p; j++) { curentNorm += x[i * p + j] * x[i * p + j]; } if (curentNorm > _maxNorm) { _maxNorm = curentNorm; } } }); tlsData.reduce([&](algorithmFPType* maxNorm) { if (globalMaxNorm < *maxNorm) { globalMaxNorm = *maxNorm; } }); algorithmFPType alpha_scaled = algorithmFPType(parameter->penaltyL2) / algorithmFPType(n); algorithmFPType lipschitz = 0.25 * (globalMaxNorm + algorithmFPType(parameter->interceptFlag)) + alpha_scaled; algorithmFPType displacement = mkl::MklMath::sMin(2 * parameter->penaltyL2, lipschitz); c = 2 * lipschitz + displacement; } algorithmFPType nonSmoothTerm = 0; if (nonSmoothTermValue) { WriteRows vr(nonSmoothTermValue, 0, 1); DAAL_CHECK_BLOCK_STATUS(vr); algorithmFPType& v = *vr.get(); if ((parameter->penaltyL1 > 0)) { for (size_t i = 1; i < nBeta; ++i) { nonSmoothTerm += (b[i] < 0 ? -b[i] : b[i]) * parameter->penaltyL1; } } v = nonSmoothTerm; } if (valueNT || gradientNT || hessianNT) { TNArray f; TNArray sg; TArrayScalable fScalable; TArrayScalable sgScalable; algorithmFPType* fPtr; algorithmFPType* sgPtr; if (n < 16) { f.reset(n); sg.reset(2 * n); fPtr = f.get(); sgPtr = sg.get(); } else { fScalable.reset(n); sgScalable.reset(2 * n); fPtr = fScalable.get(); sgPtr = sgScalable.get(); } //f = X*b + b0 applyBetaThreaded(x, b, fPtr, n, p, parameter->interceptFlag); //s = exp(-f) vexp(fPtr, sgPtr, n); //s = sigm(f), s1 = 1 - s sigmoids(sgPtr, n); const bool bL1 = (parameter->penaltyL1 > 0); const bool bL2 = (parameter->penaltyL2 > 0); const size_t iFirstBeta = parameter->interceptFlag ? 0 : 1; const algorithmFPType div = algorithmFPType(1) / algorithmFPType(n); if (valueNT) { TArrayScalable logS(2 * n); daal::internal::Math::vLog(2 * n, sgPtr, logS.get()); const algorithmFPType* ls = logS.get(); const algorithmFPType* ls1 = ls + n; WriteRows vr(valueNT, 0, 1); DAAL_CHECK_BLOCK_STATUS(vr); algorithmFPType& value = *vr.get(); value = 0.0; for (size_t i = 0; i < n; ++i) value += y[i] * ls[i] + (algorithmFPType(1) - y[i]) * ls1[i]; value *= -div; if (bL2) { for (size_t i = 1; i < nBeta; ++i) value += b[i] * b[i] * parameter->penaltyL2; } if (bL1) { if (nonSmoothTermValue) { value += nonSmoothTerm; } else { for (size_t i = 1; i < nBeta; ++i) value += (b[i] < 0 ? -b[i] : b[i]) * parameter->penaltyL1; } } } if (gradientNT) { DAAL_ASSERT(gradientNT->getNumberOfRows() == nBeta); const algorithmFPType* s = sgPtr; algorithmFPType* g; HomogenNumericTable* hmgGrad = dynamic_cast*>(gradientNT); WriteRows gr; if (hmgGrad) { g = hmgGrad->getArray(); } else { gr.set(gradientNT, 0, nBeta); DAAL_CHECK_BLOCK_STATUS(gr); g = gr.get(); } const size_t nRowsInBlock = (p < 5000 ? 5000 / p : 1); const size_t nDataBlocks = n / nRowsInBlock + !!(n % nRowsInBlock); const auto nThreads = daal::threader_get_threads_number(); if ((nThreads > 1) && (nDataBlocks > 1)) { TlsSum tlsData(nBeta); daal::threader_for(nDataBlocks, nDataBlocks, [&](size_t iBlock) { const size_t iStartRow = iBlock * nRowsInBlock; const size_t nRowsToProcess = (iBlock == nDataBlocks - 1) ? n - iBlock * nRowsInBlock : nRowsInBlock; const auto px = x + iStartRow * p; auto pg = tlsData.local(); for (size_t i = 0; i < nRowsToProcess; ++i) { const algorithmFPType d = (s[iStartRow + i] - y[iStartRow + i]); for (size_t j = 0; j < p; ++j) pg[j + 1] += d * px[i * p + j]; } }); tlsData.reduceTo(g, nBeta); } else { for (size_t i = 0; i < nBeta; ++i) g[i] = 0.0; for (size_t i = 0; i < n; ++i) { const algorithmFPType d = (s[i] - y[i]); PRAGMA_IVDEP PRAGMA_VECTOR_ALWAYS for (size_t j = 0; j < p; ++j) g[j + 1] += d * x[i * p + j]; } } if (parameter->interceptFlag) { for (size_t i = 0; i < n; ++i) g[0] += (s[i] - y[i]); } for (size_t i = iFirstBeta; i < nBeta; ++i) g[i] *= div; if (bL2) { for (size_t i = 1; i < nBeta; ++i) g[i] += 2. * b[i] * parameter->penaltyL2; } } if (hessianNT) { DAAL_ASSERT(hessianNT->getNumberOfRows() == nBeta * nBeta); WriteRows hr(hessianNT, 0, nBeta * nBeta); DAAL_CHECK_BLOCK_STATUS(hr); algorithmFPType* h = hr.get(); algorithmFPType* s = sgPtr; for (size_t i = 0; i < n; ++i) s[i] *= s[i + n]; //sigmoid derivative at x[i] h[0] = 0; if (parameter->interceptFlag) { for (size_t i = 0; i < n; ++i) h[0] += s[i]; h[0] *= div; //average of sigmoid derivatives //first row and column for (size_t k = 1; k < nBeta; ++k) { algorithmFPType val = 0; for (size_t i = 0; i < n; ++i) val += s[i] * x[i * p + k - 1]; h[k] = val * div; h[k * nBeta] = val * div; } } else { //first row and column for (size_t k = 1; k < nBeta; ++k) { h[k] = 0; h[k * nBeta] = 0; } } //rows 1,.. for (size_t j = 1; j < nBeta; ++j) { for (size_t k = j; k < nBeta; ++k) { algorithmFPType val = 0; for (size_t i = 0; i < n; ++i) val += x[i * p + j - 1] * x[i * p + k - 1] * s[i]; h[j * nBeta + k] = val * div; h[k * nBeta + j] = val * div; } h[j * nBeta + j] += 2. * parameter->penaltyL2; } } } return services::Status(); //return logistic_loss::BatchContainer::compute(); } template class Batch : public logistic_loss::Batch { public: typedef logistic_loss::Batch super; typedef algorithms::optimization_solver::logistic_loss::Input InputType; typedef algorithms::optimization_solver::logistic_loss::Parameter ParameterType; typedef typename super::ResultType ResultType; /** * Main constructor */ Batch(size_t numberOfTerms) : logistic_loss::Batch(numberOfTerms) { initialize(); } virtual ~Batch() {} Batch(const Batch& other) : logistic_loss::Batch(other) { initialize(); } virtual int getMethod() const DAAL_C11_OVERRIDE { return(int)method; } services::SharedPtr > clone() const { return services::SharedPtr >(cloneImpl()); } services::Status allocate() { return allocateResult(); } ParameterType& parameter() { return *static_cast(_par); } const ParameterType& parameter() const { return *static_cast(_par); } //static services::SharedPtr > create(size_t numberOfTerms); protected: virtual Batch* cloneImpl() const DAAL_C11_OVERRIDE { return new Batch(*this); } //virtual services::Status allocateResult() DAAL_C11_OVERRIDE //{ // services::Status s = _result->allocate(&input, _par, (int)method); // _res = _result.get(); // return s; //} void initialize() { //Analysis::_ac = new __DAAL_ALGORITHM_CONTAINER(batch, BatchContainer, algorithmFPType, method)(&_env); Analysis::_ac = new BatchContainer(&_env); _in = &input; _par = sumOfFunctionsParameter; } public: //InputType input; }; /** @} */ }