JFIFICC_PROFILElcmsmntrRGB XYZ  acspMSFTsawsctrl-hand=@=@t," desc_cprt wtptrXYZ,gXYZ@bXYZTrTRCh`gTRCh`bTRCh`descuRGBtextCC0XYZ TXYZ o8XYZ bXYZ $curv*|uN  bj. C$)j.~39?FWM6Tv\dluV~,6۾ewC    #%$""!&+7/&)4)!"0A149;>>>%.DIC;C  ;("(;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<" }!1AQa"q2#BR$3br %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz w!1AQaq"2B #3Rbr $4%&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz ?|WH?cS?Ne.r˿ޱ5\YYhFOejT7PZ[qs2c/$Ep[Gqo(Nù=QHci;OipX=Ģ8d^mQeӴm1OsL/x2];i6p!zU -/uX!=<-}JFIFICC_PROFILElcmsmntrRGB XYZ  acspMSFTsawsctrl-hand=@=@t," desc_cprt wtptrXYZ,gXYZ@bXYZTrTRCh`gTRCh`bTRCh`descuRGBtextCC0XYZ TXYZ o8XYZ bXYZ $curv*|uN  bj. C$)j.~39?FWM6Tv\dluV~,6۾ewC    #%$""!&+7/&)4)!"0A149;>>>%.DIC;C  ;("(;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<" }!1AQa"q2#BR$3br %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz w!1AQaq"2B #3Rbr $4%&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz ?|WH?cS?Ne.r˿ޱ5\YYhFOejT7PZ[qs2c/$Ep[Gqo(Nù=QHci;OipX=Ģ8d^mQeӴm1OsL/x2];i6p!zU -/uX!=<-}JFIFICC_PROFILElcmsmntrRGB XYZ  acspMSFTsawsctrl-hand=@=@t," desc_cprt wtptrXYZ,gXYZ@bXYZTrTRCh`gTRCh`bTRCh`descuRGBtextCC0XYZ TXYZ o8XYZ bXYZ $curv*|uN  bj. C$)j.~39?FWM6Tv\dluV~,6۾ewC    #%$""!&+7/&)4)!"0A149;>>>%.DIC;C  ;("(;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<" }!1AQa"q2#BR$3br %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz w!1AQaq"2B #3Rbr $4%&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz ?|WH?cS?Ne.r˿ޱ5\YYhFOejT7PZ[qs2c/$Ep[Gqo(Nù=QHci;OipX=Ģ8d^mQeӴm1OsL/x2];i6p!zU -/uX!=<-}JFIFICC_PROFILElcmsmntrRGB XYZ  acspMSFTsawsctrl-hand=@=@t," desc_cprt wtptrXYZ,gXYZ@bXYZTrTRCh`gTRCh`bTRCh`descuRGBtextCC0XYZ TXYZ o8XYZ bXYZ $curv*|uN  bj. C$)j.~39?FWM6Tv\dluV~,6۾ewC    #%$""!&+7/&)4)!"0A149;>>>%.DIC;C  ;("(;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<" }!1AQa"q2#BR$3br %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz w!1AQaq"2B #3Rbr $4%&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz ?|WH?cS?Ne.r˿ޱ5\YYhFOejT7PZ[qs2c/$Ep[Gqo(Nù=QHci;OipX=Ģ8d^mQeӴm1OsL/x2];i6p!zU -/uX!=<-}JFIFICC_PROFILElcmsmntrRGB XYZ  acspMSFTsawsctrl-hand=@=@t," desc_cprt wtptrXYZ,gXYZ@bXYZTrTRCh`gTRCh`bTRCh`descuRGBtextCC0XYZ TXYZ o8XYZ bXYZ $curv*|uN  bj. C$)j.~39?FWM6Tv\dluV~,6۾ewC    #%$""!&+7/&)4)!"0A149;>>>%.DIC;C  ;("(;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<" }!1AQa"q2#BR$3br %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz w!1AQaq"2B #3Rbr $4%&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz ?|WH?cS?Ne.r˿ޱ5\YYhFOejT7PZ[qs2c/$Ep[Gqo(Nù=QHci;OipX=Ģ8d^mQeӴm1OsL/x2];i6p!zU -/uX!=<-}JFIFICC_PROFILElcmsmntrRGB XYZ  acspMSFTsawsctrl-hand=@=@t," desc_cprt wtptrXYZ,gXYZ@bXYZTrTRCh`gTRCh`bTRCh`descuRGBtextCC0XYZ TXYZ o8XYZ bXYZ $curv*|uN  bj. C$)j.~39?FWM6Tv\dluV~,6۾ewC    #%$""!&+7/&)4)!"0A149;>>>%.DIC;C  ;("(;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<" }!1AQa"q2#BR$3br %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz w!1AQaq"2B #3Rbr $4%&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz ?|WH?cS?Ne.r˿ޱ5\YYhFOejT7PZ[qs2c/$Ep[Gqo(Nù=QHci;OipX=Ģ8d^mQeӴm1OsL/x2];i6p!zU -/uX!=<-}JFIFICC_PROFILElcmsmntrRGB XYZ  acspMSFTsawsctrl-hand=@=@t," desc_cprt wtptrXYZ,gXYZ@bXYZTrTRCh`gTRCh`bTRCh`descuRGBtextCC0XYZ TXYZ o8XYZ bXYZ $curv*|uN  bj. C$)j.~39?FWM6Tv\dluV~,6۾ewC    #%$""!&+7/&)4)!"0A149;>>>%.DIC;C  ;("(;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<" }!1AQa"q2#BR$3br %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz w!1AQaq"2B #3Rbr $4%&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz ?|WH?cS?Ne.r˿ޱ5\YYhFOejT7PZ[qs2c/$Ep[Gqo(Nù=QHci;OipX=Ģ8d^mQeӴm1OsL/x2];i6p!zU -/uX!=<-}JFIFICC_PROFILElcmsmntrRGB XYZ  acspMSFTsawsctrl-hand=@=@t," desc_cprt wtptrXYZ,gXYZ@bXYZTrTRCh`gTRCh`bTRCh`descuRGBtextCC0XYZ TXYZ o8XYZ bXYZ $curv*|uN  bj. C$)j.~39?FWM6Tv\dluV~,6۾ewC    #%$""!&+7/&)4)!"0A149;>>>%.DIC;C  ;("(;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<" }!1AQa"q2#BR$3br %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz w!1AQaq"2B #3Rbr $4%&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz ?|WH?cS?Ne.r˿ޱ5\YYhFOejT7PZ[qs2c/$Ep[Gqo(Nù=QHci;OipX=Ģ8d^mQeӴm1OsL/x2];i6p!zU -/uX!=<-}JFIFICC_PROFILElcmsmntrRGB XYZ  acspMSFTsawsctrl-hand=@=@t," desc_cprt wtptrXYZ,gXYZ@bXYZTrTRCh`gTRCh`bTRCh`descuRGBtextCC0XYZ TXYZ o8XYZ bXYZ $curv*|uN  bj. C$)j.~39?FWM6Tv\dluV~,6۾ewC    #%$""!&+7/&)4)!"0A149;>>>%.DIC;C  ;("(;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<" }!1AQa"q2#BR$3br %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz w!1AQaq"2B #3Rbr $4%&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz ?|WH?cS?Ne.r˿ޱ5\YYhFOejT7PZ[qs2c/$Ep[Gqo(Nù=QHci;OipX=Ģ8d^mQeӴm1OsL/x2];i6p!zU -/uX!=<-}JFIFICC_PROFILElcmsmntrRGB XYZ  acspMSFTsawsctrl-hand=@=@t," desc_cprt wtptrXYZ,gXYZ@bXYZTrTRCh`gTRCh`bTRCh`descuRGBtextCC0XYZ TXYZ o8XYZ bXYZ $curv*|uN  bj. C$)j.~39?FWM6Tv\dluV~,6۾ewC    #%$""!&+7/&)4)!"0A149;>>>%.DIC;C  ;("(;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<" }!1AQa"q2#BR$3br %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz w!1AQaq"2B #3Rbr $4%&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz ?|WH?cS?Ne.r˿ޱ5\YYhFOejT7PZ[qs2c/$Ep[Gqo(Nù=QHci;OipX=Ģ8d^mQeӴm1OsL/x2];i6p!zU -/uX!=<-}JFIFICC_PROFILElcmsmntrRGB XYZ  acspMSFTsawsctrl-hand=@=@t," desc_cprt wtptrXYZ,gXYZ@bXYZTrTRCh`gTRCh`bTRCh`descuRGBtextCC0XYZ TXYZ o8XYZ bXYZ $curv*|uN  bj. C$)j.~39?FWM6Tv\dluV~,6۾ewC    #%$""!&+7/&)4)!"0A149;>>>%.DIC;C  ;("(;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<" }!1AQa"q2#BR$3br %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz w!1AQaq"2B #3Rbr $4%&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz ?|WH?cS?Ne.r˿ޱ5\YYhFOejT7PZ[qs2c/$Ep[Gqo(Nù=QHci;OipX=Ģ8d^mQeӴm1OsL/x2];i6p!zU -/uX!=<-}JFIFICC_PROFILElcmsmntrRGB XYZ  acspMSFTsawsctrl-hand=@=@t," desc_cprt wtptrXYZ,gXYZ@bXYZTrTRCh`gTRCh`bTRCh`descuRGBtextCC0XYZ TXYZ o8XYZ bXYZ $curv*|uN  bj. C$)j.~39?FWM6Tv\dluV~,6۾ewC    #%$""!&+7/&)4)!"0A149;>>>%.DIC;C  ;("(;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<" }!1AQa"q2#BR$3br %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz w!1AQaq"2B #3Rbr $4%&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz ?|WH?cS?Ne.r˿ޱ5\YYhFOejT7PZ[qs2c/$Ep[Gqo(Nù=QHci;OipX=Ģ8d^mQeӴm1OsL/x2];i6p!zU -/uX!=<-}JFIFICC_PROFILElcmsmntrRGB XYZ  acspMSFTsawsctrl-hand=@=@t," desc_cprt wtptrXYZ,gXYZ@bXYZTrTRCh`gTRCh`bTRCh`descuRGBtextCC0XYZ TXYZ o8XYZ bXYZ $curv*|uN  bj. C$)j.~39?FWM6Tv\dluV~,6۾ewC    #%$""!&+7/&)4)!"0A149;>>>%.DIC;C  ;("(;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<" }!1AQa"q2#BR$3br %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz w!1AQaq"2B #3Rbr $4%&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz ?|WH?cS?Ne.r˿ޱ5\YYhFOejT7PZ[qs2c/$Ep[Gqo(Nù=QHci;OipX=Ģ8d^mQeӴm1OsL/x2];i6p!zU -/uX!=<-}JFIFICC_PROFILElcmsmntrRGB XYZ  acspMSFTsawsctrl-hand=@=@t," desc_cprt wtptrXYZ,gXYZ@bXYZTrTRCh`gTRCh`bTRCh`descuRGBtextCC0XYZ TXYZ o8XYZ bXYZ $curv*|uN  bj. C$)j.~39?FWM6Tv\dluV~,6۾ewC    #%$""!&+7/&)4)!"0A149;>>>%.DIC;C  ;("(;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<" }!1AQa"q2#BR$3br %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz w!1AQaq"2B #3Rbr $4%&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz ?|WH?cS?Ne.r˿ޱ5\YYhFOejT7PZ[qs2c/$Ep[Gqo(Nù=QHci;OipX=Ģ8d^mQeӴm1OsL/x2];i6p!zU -/uX!=<-} .
LIBYA CYBER ARMY
Logo of a company Instagram@3g86    Server : Apache
System : Linux uta-edu.server.ly 4.18.0-513.11.1.el8_9.x86_64 #1 SMP Wed Jan 17 02:00:40 EST 2024 x86_64
User : utripoli ( 1001)
PHP Version : 7.4.33
Disable Function : NONE
Directory :  /home/utripoli/public_html/alqalam/lib/pkp/tools/

Upload File :
current_dir [ Writeable ] document_root [ Writeable ]

 

Current File : /home/utripoli/public_html/alqalam/lib/pkp/tools/convertApacheAccessLogFile.php
<?php

/**
 * @file tools/convertApacheAccessLogFile.php
 *
 * Copyright (c) 2022 Simon Fraser University
 * Copyright (c) 2022 John Willinsky
 * Distributed under the GNU GPL v3. For full terms see the file docs/COPYING.
 *
 * @class ConvertApacheAccessLogFile
 *
 * @ingroup tools
 *
 * @brief CLI tool to copy, prepare and convert apache access log file into the new format needed for stats reprocessing.
 *
 * The file will be copied to the {files_dir}/usageStats/tmp/ folder,
 * only entries related to the current instalation will be filtered,
 * the file will be spit by day,
 * renamed into apache_usage_events_YYYYMMDD.log,
 * converted to the new format,
 * and copied into the {files_dir}/usageStats/archive/ folder.
 *
 * Special cases from the release 2.x are handled as following:
 *
 * Issue Galley:
 * with PDF viewer:
 * issue/viewIssue/issueId/galleyId followed by issue/viewFile/issueId/galleyId
 * -> only issue/viewFile/issueId/galleyId will be considered.
 * There is also only issue/download/issueId/galleyId (when download link is used).
 * without PDF viewer:
 * issue/viewIssue/issueId/galleyId will not be considered because the file is actually not downloaded.
 * But issue/download/issueId/galleyId will be considered.
 *
 * PDF Galley:
 * article/view/articleId/galleyId followed by article/viewFile/articleId/galleyId
 * -> only article/viewFile/articleId/galleyId will be considered.
 * There is also only article/donwload/articleId/galleyId (when download link is used.
 * without PDF viewer:
 * article/view/articleId/galleyId will not be considered because the file is actually not downloaded.
 * But article/download/articleId/galleyId will be considered.
 *
 * HMTL Galley:
 * article/view/articleId/galleyId followed by article/viewFile/articleId/galleyId
 * -> only article/viewFile/articleId/galleyId will be considered.
 *
 * Other and Remote Galley:
 * article/view/articleId/galleyId
 *
 * Supp File:
 * article/downloadSuppFile/articleId/galleyId
 */

require(dirname(__FILE__, 4) . '/tools/bootstrap.php');

use APP\core\Application;
use APP\facades\Repo;
use APP\statistics\StatisticsHelper;
use PKP\cliTool\ConvertLogFileTool;
use PKP\context\Context;
use PKP\db\DAORegistry;
use PKP\file\FileManager;
use PKP\statistics\PKPStatisticsHelper;
use PKP\submission\Genre;
use PKP\task\FileLoader;

class ConvertApacheAccessLogFile extends ConvertLogFileTool
{
    /**
     * Path to the egrep program, required for this tool to work, e.g. '/bin/egrep'
     */
    public const EGREP_PATH = '/bin/egrep';

    /**
     * Weather the URL parameters are used instead of CGI PATH_INFO.
     * This is the former variable 'disable_path_info' in the config.inc.php
     *
     * This needs to be set to true if the URLs in the old log file contain the paramteres as URL query string.
     */
    public const PATH_INFO_DISABLED = false;

    /**
     * Regular expression that is used for parsing the apache access log file.
     *
     * The default regex can parse apache access log file in combined format
     * ("%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"").
     *
     * If the apache log file is in different format the correct regex needs to be entered here, so
     * that ip, date, url, returnCode, and userAgent can be extracted,
     * s. also PHP subpatterns naming: https://www.php.net/manual/en/regexp.reference.subpatterns.php
     */
    public const PARSEREGEX = '/^(?P<ip>\S+) \S+ \S+ \[(?P<date>.*?)\] "\S+ (?P<url>\S+).*?" (?P<returnCode>\S+) \S+ ".*?" "(?P<userAgent>.*?)"/';

    /**
     * PHP format of the time in the log file.
     * S. https://www.php.net/manual/en/datetime.format.php
     *
     * The default format can parse the apache access log file combined format ([day/month/year:hour:minute:second zone]).
     *
     * If the time in the apache log file is in a different format the correct PHP format needs to be entered here.
     */
    // TO-DO: ask how to deal with timezone, do we need it?
    public const PHP_DATETIME_FORMAT = 'd/M/Y:H:i:s O';

    /**
     * PHP format of the date (without time and timezone)
     */
    public const PHP_DATE_FORMAT = 'd/M/Y';

    /**
     * Constructor.
     *
     * @param array $argv command-line arguments
     */
    public function __construct(array $argv = [])
    {
        parent::__construct($argv);
        if (count($this->argv) < 1 || count($this->argv) > 2) {
            $this->usage();
            exit(8);
        }

        // This tool needs egrep path configured.
        if (file_exists(self::EGREP_PATH)) {
            fwrite(STDERR, 'Error: This tool needs egrep program. Please define the constatn EGREP_PATH in this script, enter there the path to egrep command on your machine.' . PHP_EOL);
            exit(9);
        }
    }

    public function getLogFileDir(): string
    {
        return PKPStatisticsHelper::getUsageStatsDirPath() . '/tmp';
    }

    public function getParseRegex(): string
    {
        return self::PARSEREGEX;
    }

    public function getPhpDateTimeFormat(): string
    {
        return self::PHP_DATETIME_FORMAT;
    }

    public function isPathInfoDisabled(): bool
    {
        return self::PATH_INFO_DISABLED;
    }

    public function isApacheAccessLogFile(): bool
    {
        return true;
    }

    /**
     * Print command usage information.
     */
    public function usage()
    {
        echo "\nConvert the passed apache access log file into the new usage stats log file format.
This will copy the apache access file to the usageStats/tmp/ folder in the files directory,
filter entries related to this installation, split the file by day, rename the result file(s)
into apache_usage_events_YYYYMMDD.log, convert them into the new JSON format, and
copy them to usageStats/archive/ folder.
Must run under user with enough privilegies to read access apache log files.\n"
. "  Usage: php {$this->scriptName} [path/to/apache/log/file.log]\n\n";
    }

    /**
     * Create the temporary processing folder and call the function to process the log file.
     */
    public function execute(): void
    {
        $fileMgr = new FileManager();
        $filePath = current($this->argv);

        if ($fileMgr->fileExists($this->getLogFileDir(), 'dir')) {
            $fileMgr->rmtree($this->getLogFileDir());
        }

        if (!$fileMgr->mkdir($this->getLogFileDir())) {
            fwrite(STDERR, "Error: Can't create folder " . $this->getLogFileDir() . PHP_EOL);
            exit(10);
        }

        if ($fileMgr->fileExists($filePath)) {
            $this->processAccessLogFile($filePath);
        } else {
            fwrite(STDERR, "Error: File {$filePath} don't exist or can't be accessed." . PHP_EOL);
            exit(11);
        }

        // Do not remove tmp/ folder here -- it could be used by admins for checking and debugging
    }

    /**
     * Process the access log file:
     * copy it to the usageStats/tmp/ folder,
     * filter entries related to this installation,
     * split by day,
     * convert into the new JSON format,
     * copy to usageStats/archive/ folder.
     */
    public function processAccessLogFile(string $filePath)
    {
        $copiedFilePath = $this->copyFile($filePath);
        $filteredFilePath = $this->filterFile($copiedFilePath);
        $dailyFiles = $this->splitFileByDay($filteredFilePath);
        $fileMgr = new FileManager();
        foreach ($dailyFiles as $dailyFile) {
            $this->convert($dailyFile);
            $this->archive($dailyFile);
            if (pathinfo($filePath, PATHINFO_EXTENSION) == 'gz') {
                $archiveFilePath = StatisticsHelper::getUsageStatsDirPath() . '/' . FileLoader::FILE_LOADER_PATH_ARCHIVE . '/' . $dailyFile;
                $archiveFilePath = $fileMgr->gzCompressFile($archiveFilePath);
            }
        }
    }

    /**
     * Copy acess log file to the folder usageStats/tmp/
     */
    public function copyFile(string $filePath): string
    {
        $fileName = pathinfo($filePath, PATHINFO_BASENAME);
        $tmpFilePath = "{$this->getLogFileDir()}/{$fileName}";
        $fileMgr = new FileManager();
        if (!$fileMgr->copyFile($filePath, $tmpFilePath)) {
            fwrite(STDERR, "Could not copy file from {$filePath} to {$tmpFilePath}." . PHP_EOL);
            exit(12);
        }
        echo "File {$filePath} copied to {$tmpFilePath}.\n";
        return $tmpFilePath;
    }

    /**
     * Filtering accell log file entries related to this installation, i.e.
     * that contain existing context paths.
     * Save the filtered entries into a new file with the ending _tmp.
     */
    public function filterFile(string $filePath): string
    {
        $fileMgr = new FileManager();
        if (pathinfo($filePath, PATHINFO_EXTENSION) == 'gz') {
            try {
                $filePath = $fileMgr->gzDecompressFile($filePath);
            } catch (Exception $e) {
                fwrite(STDERR, $e->getMessage() . PHP_EOL);
                exit(13);
            }
        }

        $filteredFilePath = $filePath . '_tmp';
        $callback = fn (Context $context): string => $context->getPath();
        $escapedContextPaths = implode('/|/', array_map('escapeshellarg', array_map($callback, $this->contextsByPath)));
        $output = null;
        $returnValue = 0;
        exec(escapeshellarg(self::EGREP_PATH) . " -i '" . $escapedContextPaths . "' " . escapeshellarg($filePath) . ' > ' . escapeshellarg($filteredFilePath), $output, $returnValue);
        if ($returnValue > 1) {
            fwrite(STDERR, 'Error: the execution of ' . self::EGREP_PATH . ' is not possible.' . PHP_EOL);
            exit(14);
        }
        clearstatcache();
        if (filesize($filePath) == 0) {
            fwrite(STDERR, 'Error: No entries found related to this installation.' . PHP_EOL);
            exit(15);
        }
        return $filteredFilePath;
    }

    /**
     * Split access log file by day. The new, daily files will be named to apache_usage_events_YYYYMMDD.log
     *
     * @return array List of daily access log files.
     */
    public function splitFileByDay(string $filePath): array
    {
        // Get the first and the last date in the log file
        $firstDate = $lastDate = null;
        $splFileObject = new SplFileObject($filePath, 'r');
        while (!$splFileObject->eof()) {
            $line = $splFileObject->fgets();
            if (preg_match(self::PARSEREGEX, $line, $m)) {
                $firstDate = DateTime::createFromFormat(self::PHP_DATETIME_FORMAT, $m[2]);
                break;
            }
        }
        $splFileObject->seek(PHP_INT_MAX);
        $lastLineNo = $splFileObject->key() + 1;
        do {
            $splFileObject->seek($lastLineNo);
            $line = $splFileObject->current();
            if (preg_match(self::PARSEREGEX, $line, $m)) {
                $lastDate = DateTime::createFromFormat(self::PHP_DATETIME_FORMAT, $m[2]);
                break;
            }
            $lastLineNo = $splFileObject->key() - 1;
        } while ($lastLineNo > 0);
        //explicitly assign null, so that the file can be deleted
        $splFileObject = null;

        if (is_null($firstDate) || is_null($lastDate)) {
            fwrite(STDERR, 'Error: First or last date not found.' . PHP_EOL);
            exit(16);
        }

        // Get all days between the first and the last date, including the last date
        $period = new DatePeriod(
            $firstDate,
            new DateInterval('P1D'),
            $lastDate
        );

        $dailyFiles = [];
        foreach ($period as $key => $value) {
            $day = $value->format('Ymd');
            // Check if a converted apache file with the same day already exists in any of usageStats/ folders.
            $existingApacheUsageEventsFiles = glob(PKPStatisticsHelper::getUsageStatsDirPath() . '/*/apache_usage_events_' . $day . '*');
            $existingApacheUsageEventsFilesCount = count($existingApacheUsageEventsFiles) ? count($existingApacheUsageEventsFiles) : 0;
            $countPartOfFileName = '';
            if ($existingApacheUsageEventsFilesCount) {
                $countPartOfFileName = "_{$existingApacheUsageEventsFilesCount}_";
                fwrite(STDERR, "Warning: One or more files apache_usage_events_{$day}.log already exist. You will need to clean or merge them into one before reprocessing the statistics." . PHP_EOL);
            }
            $dailyFileName = 'apache_usage_events_' . $day . $countPartOfFileName . '.log';
            $dayFilePath = $this->getLogFileDir() . '/' . $dailyFileName;
            $output = null;
            $returnValue = 0;
            exec(escapeshellarg(self::EGREP_PATH) . " -i '" . preg_quote($value->format(self::PHP_DATE_FORMAT)) . "' " . escapeshellarg($filePath) . ' > ' . escapeshellarg($dayFilePath), $output, $returnValue);
            if ($returnValue > 1) {
                fwrite(STDERR, 'Error: Could not split file by day.' . PHP_EOL);
                exit(17);
            }
            $dailyFiles[] = $dailyFileName;
            echo "File {$dayFilePath} created.\n";
        }

        return $dailyFiles;
    }

    /**
     * Copy the file from the folder usageStats/tmp/ into usageStats/archive/.
     */
    public function archive(string $fileName): void
    {
        $tmpFilePath = "{$this->getLogFileDir()}/{$fileName}";
        $archiveFilePath = StatisticsHelper::getUsageStatsDirPath() . '/' . FileLoader::FILE_LOADER_PATH_ARCHIVE . '/' . $fileName;
        $fileMgr = new FileManager();
        if (!$fileMgr->copyFile($tmpFilePath, $archiveFilePath)) {
            fwrite(STDERR, "Error: Could not copy file from {$tmpFilePath} to {$archiveFilePath}." . PHP_EOL);
            exit(18);
        }
        echo "File {$tmpFilePath} successfully archived to {$archiveFilePath}.\n";
    }

    /**
    * Get the expected page and operation.
    * They are grouped by the object type constant that
    * they give access to.
    */
    protected function getExpectedPageAndOp(): array
    {
        $pageAndOp = [
            Application::getContextAssocType() => [
                'index/index'
            ]
        ];
        $application = Application::get();
        $applicationName = $application->getName();
        switch ($applicationName) {
            case 'ojs2':
                $pageAndOp = $pageAndOp + [
                    Application::ASSOC_TYPE_SUBMISSION_FILE_COUNTER_OTHER => [
                        'article/downloadSuppFile'],
                    Application::ASSOC_TYPE_SUBMISSION_FILE => [
                        'article/download', 'article/viewFile'],
                    Application::ASSOC_TYPE_SUBMISSION => [
                        'article/view', 'article/viewArticle'],
                    Application::ASSOC_TYPE_ISSUE => [
                        'issue/view'],
                    Application::ASSOC_TYPE_ISSUE_GALLEY => [
                        'issue/download', 'issue/viewFile']
                ];
                $pageAndOp[Application::getContextAssocType()][] = 'index';
                break;
            case 'omp':
                $pageAndOp = $pageAndOp + [
                    Application::ASSOC_TYPE_SUBMISSION_FILE => [
                        'catalog/download'],
                    Application::ASSOC_TYPE_MONOGRAPH => [
                        'catalog/book'],
                    Application::ASSOC_TYPE_SERIES => [
                        'catalog/series']
                ];
                $pageAndOp[Application::getContextAssocType()][] = 'catalog/index';
                break;
            case 'ops':
                $pageAndOp = $pageAndOp + [
                    Application::ASSOC_TYPE_SUBMISSION_FILE => [
                        'preprint/download'],
                    Application::ASSOC_TYPE_SUBMISSION => [
                        'preprint/view']
                ];
                $pageAndOp[Application::getContextAssocType()][] = 'index';
                break;
            default:
                throw new Exception('Unrecognized application name.');
        }
        return $pageAndOp;
    }

    /**
     * Set assoc type and IDs from the passed page, operation and arguments.
     */
    protected function setAssoc(int $assocType, string $op, array $args, array &$newEntry): void
    {
        $application = Application::get();
        $applicationName = $application->getName();
        switch ($applicationName) {
            case 'ojs2':
                $this->setOJSAssoc($assocType, $args, $newEntry);
                break;
            case 'omp':
                $this->setOMPAssoc($assocType, $args, $newEntry);
                break;
            case 'ops':
                $this->setOPSAssoc($assocType, $args, $newEntry);
                break;
            default:
                throw new Exception('Unrecognized application name!');
        }
    }

    /**
     * Set assoc type and IDs from the passed page, operation and
     * arguments specific to OJS.
     */
    protected function setOJSAssoc(int $assocType, array $args, array &$newEntry): void
    {
        switch ($assocType) {
            case Application::getContextAssocType():
                // $newEntry['contextId'] has already been set
                $newEntry['assocType'] = $assocType;
                break;

            case Application::ASSOC_TYPE_SUBMISSION:
                if (!isset($args[0])) {
                    fwrite(STDERR, 'Missing submission ID URL parameter.' . PHP_EOL);
                    break;
                }

                $submission = Repo::submission()->getByBestId($args[0], $newEntry['contextId']);
                if (!$submission) {
                    fwrite(STDERR, "Submission with the URL path or ID {$args[0]} does not exist in the journal with the ID {$newEntry['contextId']}." . PHP_EOL);
                    break;
                }
                $submissionId = $submission->getId();

                // If it is an older submission version, the arguments must be:
                // $submissionId/version/$publicationId.
                // Consider also releases 2.x where log files can contain URL
                // view/$submissionId/$representationId i.e. without $submissionFileId argument
                // for other and remote galleys.
                if (in_array('version', $args)) {
                    if ($args[1] !== 'version' || !isset($args[2])) {
                        fwrite(STDERR, 'The following arguments are expected and not found: <submissionId>/version/<publicationId>.' . PHP_EOL);
                        break;
                    }
                    $publicationId = (int) $args[2];
                    if (!Repo::publication()->exists($publicationId, $submissionId)) {
                        fwrite(STDERR, "Publication (submission version) with the ID {$publicationId} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
                        break;
                    }
                } elseif (count($args) == 2) {
                    // Consider usage stats log files from releases 2.x:
                    // The URL article/view/{$articleId}/{$galleyId} was used for assoc type galley.
                    // Only Other galleys will be considered here (s. file description above).
                    // Those should then be assoc type = submission file.
                    $representationUrlPath = $args[1];
                    $galley = $representationId = null;
                    if (ctype_digit((string) $representationUrlPath)) {
                        // assume it is ID and not the URL path
                        $representationId = (int) $representationUrlPath;
                        $galley = Repo::galley()->get($representationId);
                    } else {
                        // We need to get the publication in order to be able to get galley by URL path
                        // We cannot assume that this is the current publication,
                        // because the log entry can be long time ago, and
                        // since then there could be new submission versions created,
                        // so take the first publication and galley found with the given representationUrlPath.
                        // (Different publications can contain the same galley URL path.)
                        // It is not accurate but only possible.
                        $publications = $submission->getData('publications');
                        foreach ($publications as $publication) {
                            foreach ($publication->getData('galleys') as $publicationGalley) {
                                if ($publicationGalley->getBestGalleyId() == $representationUrlPath) {
                                    $galley = $publicationGalley;
                                    $representationId = $publicationGalley->getId();
                                    break 2;
                                }
                            }
                        }
                    }
                    if (!isset($galley)) {
                        fwrite(STDERR, "Galley with the URL path {$representationUrlPath} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
                        break;
                    }
                    $submissionFileId = $galley->getData('submissionFileId');
                    if (!isset($submissionFileId)) {
                        break;
                    }
                    $submissionFile = Repo::submissionFile()->get($submissionFileId, $submissionId);
                    if (!isset($submissionFile)) {
                        fwrite(STDERR, "Submission file with the ID {$submissionFileId} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
                        break;
                    }
                    $fileType = StatisticsHelper::getDocumentType($submissionFile->getData('mimetype'));
                    if ($fileType == StatisticsHelper::STATISTICS_FILE_TYPE_PDF || $fileType == StatisticsHelper::STATISTICS_FILE_TYPE_HTML) {
                        // Do not consider PDF and HTML file, the download URL will follow
                        break;
                    }
                    $newEntry['assocType'] = Application::ASSOC_TYPE_SUBMISSION_FILE;
                    $newEntry['submissionId'] = $submissionId;
                    $newEntry['representationId'] = $representationId;
                    $newEntry['submissionFileId'] = $submissionFileId;
                    $newEntry['fileType'] = $fileType;
                    break;
                }
                $newEntry['submissionId'] = $submissionId;
                $newEntry['assocType'] = $assocType;
                break;

            case Application::ASSOC_TYPE_SUBMISSION_FILE:
                if (!isset($args[0])) {
                    fwrite(STDERR, 'Missing submission ID URL parameter.' . PHP_EOL);
                    break;
                }
                if (!isset($args[1])) {
                    fwrite(STDERR, 'Missing galley ID URL parameter.' . PHP_EOL);
                    break;
                }

                $submission = Repo::submission()->getByBestId($args[0], $newEntry['contextId']);
                if (!$submission) {
                    fwrite(STDERR, "Submission with the URL path or ID {$args[0]} does not exist in the journal with the ID {$newEntry['contextId']}." . PHP_EOL);
                    break;
                }
                $submissionId = $submission->getId();

                // If it is an older submission version, the arguments must be:
                // $submissionId/version/$publicationId/$representationId/$submissionFileId.
                // Consider also this issue: https://github.com/pkp/pkp-lib/issues/6573
                // where apache log files can contain URL download/$submissionId/$representationId,
                // i.e. without $submissionFileId argument.
                // Also the URLs from releases 2.x will not have submissionFileId.
                $publicationId = $submissionFileId = null; // do not necessarily exist
                if (in_array('version', $args)) {
                    if ($args[1] !== 'version' || !isset($args[2]) || !isset($args[3])) {
                        // if version is there, there must be $publicationId and $representationId arguments
                        fwrite(STDERR, 'The following arguments are expected and not found: <submissionId>/version/<publicationId>/<galleyId>/<fileId>.' . PHP_EOL);
                        break;
                    }
                    $publicationId = (int) $args[2];
                    $representationUrlPath = $args[3];
                    if (isset($args[4])) {
                        $submissionFileId = (int) $args[4];
                    }
                } else {
                    $representationUrlPath = $args[1];
                    if (isset($args[2])) {
                        $submissionFileId = (int) $args[2];
                    }
                }

                // Find the galley and representation ID
                $representationId = $galley = null;
                if (ctype_digit((string) $representationUrlPath)) {
                    // assume it is ID and not the URL path
                    $representationId = (int) $representationUrlPath;
                    $galley = Repo::galley()->get($representationId);
                    if (!$galley) {
                        fwrite(STDERR, "Galley with the ID {$representationUrlPath} does not exist." . PHP_EOL);
                        break;
                    }
                } else {
                    // We need to get the publication in order to be able to get galley by URL path
                    $publications = $submission->getData('publications');
                    if (isset($publicationId)) {
                        $publication = $publications->first(function ($value, $key) use ($publicationId) {
                            return $value->getId() == $publicationId;
                        });
                        if (!$publication) {
                            fwrite(STDERR, "Publication (submission version) with the ID {$publicationId} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
                            break;
                        }
                        $galley = Repo::galley()->getByUrlPath($representationUrlPath, $publication);
                        if (!$galley) {
                            fwrite(STDERR, "Galley with the URL path {$representationUrlPath} does not exist in the publication (submission version) with the ID {$publicationId}." . PHP_EOL);
                            break;
                        }
                        $representationId = $galley->getId();
                    } else {
                        // We cannot assume that this is the current publication,
                        // because the log entry can be long time ago, and
                        // since then there could be new submission versions created,
                        // so take the first publication and galley found with the given representationUrlPath.
                        // (Different publications can contain the same galley URL path.)
                        $possibleGalleys = [];
                        foreach ($publications as $publication) {
                            foreach ($publication->getData('galleys') as $publicationGalley) {
                                if ($publicationGalley->getBestGalleyId() == $representationUrlPath) {
                                    $possibleGalleys[] = $publicationGalley;
                                    if (isset($submissionFileId) && $publicationGalley->getData('submissionFileId') == $submissionFileId) {
                                        $galley = $publicationGalley;
                                        $representationId = $publicationGalley->getId();
                                        break 2;
                                    }
                                }
                            }
                        }
                        if (empty($possibleGalleys)) {
                            fwrite(STDERR, "Galley with the URL path {$representationUrlPath} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
                            break;
                        }
                        // if no matching galley has been found yet, take the first possible
                        if (!isset($representationId)) {
                            $galley = $possibleGalleys[0];
                            $representationId = $galley->getId();
                        }
                    }
                }
                if (!$submissionFileId) {
                    $submissionFileId = $galley->getData('submissionFileId');
                }
                $submissionFile = Repo::submissionFile()->get($submissionFileId, $submissionId);
                if (!$submissionFile) {
                    fwrite(STDERR, "Submission file with the ID {$submissionFileId} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
                    break;
                }
                if ($galley->getData('submissionFileId') != $submissionFileId) {
                    // This check is e.g. when representation ID (and not URL path) and submissionFileId are given as arguments
                    fwrite(STDERR, "Submission file with the ID {$submissionFileId} does not belong to the galley with the ID {$representationId}." . PHP_EOL);
                    break;
                }

                // is this a full text or supp file
                $genreDao = DAORegistry::getDAO('GenreDAO');
                $genre = $genreDao->getById($submissionFile->getData('genreId'));
                if ($genre->getCategory() != Genre::GENRE_CATEGORY_DOCUMENT || $genre->getSupplementary() || $genre->getDependent()) {
                    $newEntry['assocType'] = Application::ASSOC_TYPE_SUBMISSION_FILE_COUNTER_OTHER;
                } else {
                    $newEntry['assocType'] = $assocType;
                }
                $newEntry['submissionId'] = $submissionId;
                $newEntry['representationId'] = $representationId;
                $newEntry['submissionFileId'] = $submissionFileId;
                $newEntry['fileType'] = StatisticsHelper::getDocumentType($submissionFile->getData('mimetype'));
                break;

            case Application::ASSOC_TYPE_SUBMISSION_FILE_COUNTER_OTHER:
                // This is the URL article/downloadSuppFile/articleId/suppFileId from a 2.x log file
                if (!isset($args[0])) {
                    fwrite(STDERR, 'Missing submission ID URL parameter.' . PHP_EOL);
                    break;
                }
                if (!isset($args[1])) {
                    fwrite(STDERR, 'Missing supp file ID URL parameter.' . PHP_EOL);
                    break;
                }

                $submission = Repo::submission()->getByBestId($args[0], $newEntry['contextId']);
                if (!$submission) {
                    fwrite(STDERR, "Submission with the URL path or ID {$args[0]} does not exist in the journal with the ID {$newEntry['contextId']}." . PHP_EOL);
                    break;
                }
                $submissionId = $submission->getId();

                $galley = $submissionFile = null;
                $publications = $submission->getData('publications');
                foreach ($publications as $publication) {
                    foreach ($publication->getData('galleys') as $possibleGalley) {
                        $possibleSubmissionFileId = $possibleGalley->getData('submissionFileId');
                        if ($possibleSubmissionFileId) { // it is not a remote supp file
                            $possibleSubmissionFile = Repo::submissionFile()->get($possibleSubmissionFileId, $submissionId);
                            if ($possibleSubmissionFile) {
                                if (ctype_digit((string) $args[1])) { // supp file ID
                                    if ($possibleSubmissionFile->getData('old-supp-id') == $args[1]) {
                                        // Galley and file found
                                        $galley = $possibleGalley;
                                        $submissionFile = $possibleSubmissionFile;
                                        break 2;
                                    }
                                } else { // supp file URL path
                                    if ($possibleGalley->getData('urlPath') == $args[1]) {
                                        // Galley and file found
                                        $galley = $possibleGalley;
                                        $submissionFile = $possibleSubmissionFile;
                                        break 2;
                                    }
                                }
                            }
                        }
                    }
                }
                if ($galley && $submissionFile) {
                    $newEntry['assocType'] = $assocType;
                    $newEntry['submissionId'] = $submissionId;
                    $newEntry['representationId'] = $galley->getId();
                    $newEntry['submissionFileId'] = $submissionFile->getId();
                    $newEntry['fileType'] = StatisticsHelper::getDocumentType($submissionFile->getData('mimetype'));
                } else {
                    fwrite(STDERR, 'Supp file could not be found.' . PHP_EOL);
                }
                break;

            case Application::ASSOC_TYPE_ISSUE:
                if (!isset($args[0])) {
                    fwrite(STDERR, 'Missing issue ID URL parameter.' . PHP_EOL);
                    break;
                }
                // Consider issue https://github.com/pkp/pkp-lib/issues/6611
                // where apache log files contain both URLs for issue galley download:
                // issue/view/issueId/galleyId (that should not be considered here), as well as
                // issue/download/issueId/galleyId (that is considered below)
                if (count($args) != 1) {
                    break;
                }
                $issue = Repo::issue()->getByBestId($args[0], $newEntry['contextId']);
                if (!$issue) {
                    fwrite(STDERR, "Issue with the URL path or ID {$args[0]} does not exist in the journal with the ID {$newEntry['contextId']}." . PHP_EOL);
                    break;
                }
                $issueId = $issue->getId();
                $newEntry['issueId'] = $issueId;
                $newEntry['assocType'] = $assocType;
                break;

            case Application::ASSOC_TYPE_ISSUE_GALLEY:
                if (!isset($args[0])) {
                    fwrite(STDERR, 'Missing issue ID URL parameter.' . PHP_EOL);
                    break;
                }
                if (!isset($args[1])) {
                    fwrite(STDERR, 'Missing issue galley ID URL parameter.' . PHP_EOL);
                    break;
                }

                $issue = Repo::issue()->getByBestId($args[0], $newEntry['contextId']);
                if (!$issue) {
                    fwrite(STDERR, "Issue with the URL path or ID {$args[0]} does not exist in the journal with the ID {$newEntry['contextId']}." . PHP_EOL);
                    break;
                }
                $issueId = $issue->getId();
                $issueGalleyDao = DAORegistry::getDAO('IssueGalleyDAO');
                $issueGalley = $issueGalleyDao->getByBestId($args[1], $issueId);
                if (!$issueGalley) {
                    fwrite(STDERR, "Issue galley with the URL path or ID {$args[1]} does not exist in the issue with the ID {$issueId}." . PHP_EOL);
                    break;
                }
                $newEntry['issueId'] = $issueId;
                $newEntry['issueGalleyId'] = $issueGalley->getId();
                $newEntry['assocType'] = $assocType;
                break;
        }
    }

    /**
     * Set assoc type and IDs from the passed page, operation and
     * arguments specific to OMP.
     */
    protected function setOMPAssoc(int $assocType, array $args, array &$newEntry): void
    {
        switch ($assocType) {
            case Application::getContextAssocType():
                // $newEntry['contextId'] has already been set
                $newEntry['assocType'] = $assocType;
                break;

            case Application::ASSOC_TYPE_SUBMISSION:
                if (!isset($args[0])) {
                    fwrite(STDERR, 'Missing submission ID URL parameter.' . PHP_EOL);
                    break;
                }

                $submission = Repo::submission()->getByBestId($args[0], $newEntry['contextId']);
                if (!$submission) {
                    fwrite(STDERR, "Submission with the URL path or ID {$args[0]} does not exist in the press with the ID {$newEntry['contextId']}." . PHP_EOL);
                    break;
                }
                $submissionId = $submission->getId();

                // If it is an older submission version, the arguments must be:
                // $submissionId/version/$publicationId.
                $publicationId = null;
                if (in_array('version', $args)) {
                    if ($args[1] !== 'version' || !isset($args[2])) {
                        fwrite(STDERR, 'The following arguments are expected and not found: <submissionId>/version/<publicationId>.' . PHP_EOL);
                        break;
                    }
                    $publicationId = (int) $args[2];
                    if (!Repo::publication()->exists($publicationId, $submissionId)) {
                        fwrite(STDERR, "Publication (submission version) with the ID {$publicationId} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
                        break;
                    }
                }

                // Is it a chapter landing page
                $chapter = null;
                if (in_array('chapter', $args)) {
                    if (isset($publicationId)) {
                        // The URL is $submissionId/version/$publicationId/chapter/$chapterId
                        if ($args[3] !== 'chapter' || !isset($args[4])) {
                            fwrite(STDERR, 'The following arguments are expected and not found: <submissionId>/version/<publicationId>/chapter/<chapterId>.' . PHP_EOL);
                            break;
                        }
                        $chapterId = (int) $args[4];
                    } else {
                        // The URL is $submissionId/chapter/$chapterId
                        if ($args[1] !== 'chapter' || !isset($args[2])) {
                            fwrite(STDERR, 'The following arguments are expected and not found: <submissionId>/chapter/<chapterId>.' . PHP_EOL);
                            break;
                        }
                        $chapterId = (int) $args[2];
                    }
                    $chapterDao = DAORegistry::getDAO('ChapterDAO'); /** @var ChapterDAO $chapterDao */
                    $chapter = $chapterDao->getChapter($chapterId);
                    if (!$chapter) {
                        fwrite(STDERR, "Chapter with the ID {$chapterId} does not exist." . PHP_EOL);
                        break;
                    }
                }

                $newEntry['submissionId'] = $submissionId;
                $newEntry['assocType'] = isset($chapter) ? Application::ASSOC_TYPE_CHAPTER : $assocType;
                $newEntry['chpaterId'] = isset($chapter) ? $chapter->getId() : null;
                break;

            case Application::ASSOC_TYPE_SUBMISSION_FILE:
                if (!isset($args[0])) {
                    fwrite(STDERR, 'Missing submission ID URL parameter.' . PHP_EOL);
                    break;
                }
                if (!isset($args[1])) {
                    fwrite(STDERR, 'Missing publication format ID URL parameter.' . PHP_EOL);
                    break;
                }
                if (!isset($args[2])) {
                    fwrite(STDERR, 'Missing file or publication ID URL parameter.' . PHP_EOL);
                    break;
                }

                $submission = Repo::submission()->getByBestId($args[0], $newEntry['contextId']);
                if (!$submission) {
                    fwrite(STDERR, "Submission with the URL path or ID {$args[0]} does not exist in the press with the ID {$newEntry['contextId']}." . PHP_EOL);
                    break;
                }
                $submissionId = $submission->getId();

                // If it is an older submission version, the arguments must be:
                // $submissionId/version/$publicationId/$representationId/$submissionFileId.
                $publicationId = null;
                if (in_array('version', $args)) {
                    if ($args[1] !== 'version' || !isset($args[2]) || !isset($args[3])) {
                        fwrite(STDERR, 'The following arguments are expected and not found: <submissionId>/version/<publicationId>/<publicationFormatId>/<fileId>.' . PHP_EOL);
                        break;
                    }
                    $publicationId = (int) $args[2];
                    $representationUrlPath = $args[3];
                    $submissionFileId = (int) $args[4];
                } else {
                    $representationUrlPath = $args[1];
                    $submissionFileId = (int) $args[2];
                }

                $submissionFile = Repo::submissionFile()->get($submissionFileId, $submissionId);
                if (!$submissionFile) {
                    fwrite(STDERR, "Submission file with the ID {$submissionFileId} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
                    break;
                }
                if ($submissionFile->getData('assocType') != Application::ASSOC_TYPE_PUBLICATION_FORMAT) {
                    fwrite(STDERR, "Submission file with the ID {$submissionFileId} does not belong to a publication format." . PHP_EOL);
                    break;
                }
                $representationId = $submissionFile->getData('assocId');
                $publicationFormatDao = DAORegistry::getDAO('PublicationFormatDAO'); /** @var PublicationFormatDAO $publicationFormatDao */
                if (ctype_digit((string) $representationUrlPath)) {
                    // assume it is ID and not the URL path
                    if ($representationUrlPath != $representationId) {
                        fwrite(STDERR, "Submission file with the ID {$submissionFileId} does not belong to the publication format with ID {$representationUrlPath}." . PHP_EOL);
                        break;
                    }
                    $publicationFormat = $publicationFormatDao->getById($representationId, $publicationId);
                    if (!$publicationFormat) {
                        fwrite(STDERR, "Publication format with the ID {$representationUrlPath} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
                        break;
                    }
                } else {
                    // We need to get the publication in order to be able to get publication format by URL path
                    $publications = $submission->getData('publications');
                    if (isset($publicationId)) {
                        $publication = $publications->first(function ($value, $key) use ($publicationId) {
                            return $value->getId() == $publicationId;
                        });
                        if (!$publication) {
                            fwrite(STDERR, "Publication (submission version) with the ID {$publicationId} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
                            break;
                        }
                        $publicationFormat = $publicationFormatDao->getByBestId($representationUrlPath, $publication->getId());
                        if (!$publicationFormat) {
                            fwrite(STDERR, "Publication format with the URL path {$representationUrlPath} does not exist in the publication (submission version) with the ID {$publicationId}." . PHP_EOL);
                            break;
                        }
                        if ($representationId != $publicationFormat->getId()) {
                            fwrite(STDERR, "Submission file with the ID {$submissionFileId} does not exist in the publication (submission version) with the ID {$publicationId}." . PHP_EOL);
                            break;
                        }
                    } else {
                        // We cannot assume that this is the current publication,
                        // because the log entry can be long time ago, and
                        // since then there could be new submission versions created,
                        // so take the first publication found with
                        // publication format with the given representationUrlPath
                        // that contains the given submission file.
                        // (Different publications can contain the same publication format URL path.)
                        $publicationFormat = null;
                        foreach ($publications as $publication) {
                            foreach ($publication->getData('publicationFormats') as $possiblePublicationFormat) {
                                if ($possiblePublicationFormat->getBestId() == $representationUrlPath) {
                                    if ($representationId == $possiblePublicationFormat->getId()) {
                                        $publicationFormat = $possiblePublicationFormat;
                                        break 2;
                                    }
                                }
                            }
                        }
                        if (!$publicationFormat) {
                            fwrite(STDERR, "Publication format with the URL path {$representationUrlPath} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
                            break;
                        }
                    }
                }

                // is this a full text or supp file
                $genreDao = DAORegistry::getDAO('GenreDAO');
                $genre = $genreDao->getById($submissionFile->getData('genreId'));
                if ($genre->getCategory() != Genre::GENRE_CATEGORY_DOCUMENT || $genre->getSupplementary() || $genre->getDependent()) {
                    $newEntry['assocType'] = Application::ASSOC_TYPE_SUBMISSION_FILE_COUNTER_OTHER;
                } else {
                    $newEntry['assocType'] = $assocType;
                }
                $newEntry['submissionId'] = $submissionId;
                $newEntry['representationId'] = $representationId;
                $newEntry['submissionFileId'] = $submissionFileId;
                $newEntry['fileType'] = StatisticsHelper::getDocumentType($submissionFile->getData('mimetype'));
                $newEntry['chapterId'] = $submissionFile->getData('chapterId');
                break;

            case Application::ASSOC_TYPE_SERIES:
                if (!isset($args[0])) {
                    fwrite(STDERR, 'Missing series path URL parameter.' . PHP_EOL);
                    break;
                }
                $seriesPath = $args[0];
                $series = Repo::section()->getByPath($seriesPath, $newEntry['contextId']);
                if (!$series) {
                    fwrite(STDERR, "Series with the path {$seriesPath} does not exist in the press with the ID {$newEntry['contextId']}." . PHP_EOL);
                    break;
                }
                $newEntry['seriesId'] = $series->getId();
                $newEntry['assocType'] = $assocType;
                break;
        }
    }

    /**
     * Set assoc type and IDs from the passed page, operation and
     * arguments specific to OPS.
     */
    protected function setOPSAssoc(int $assocType, array $args, array &$newEntry): void
    {
        switch ($assocType) {
            case Application::getContextAssocType():
                // $newEntry['contextId'] has already been set
                $newEntry['assocType'] = $assocType;
                break;

            case Application::ASSOC_TYPE_SUBMISSION:
                if (!isset($args[0])) {
                    fwrite(STDERR, 'Missing submission ID URL parameter.' . PHP_EOL);
                    break;
                }

                $submission = Repo::submission()->getByBestId($args[0], $newEntry['contextId']);
                if (!$submission) {
                    fwrite(STDERR, "Submission with the URL path or ID {$args[0]} does not exist in the server with the ID {$newEntry['contextId']}." . PHP_EOL);
                    break;
                }
                $submissionId = $submission->getId();

                // If it is an older submission version, the arguments must be:
                // $submissionId/version/$publicationId.
                if (in_array('version', $args)) {
                    if ($args[1] !== 'version' || !isset($args[2])) {
                        fwrite(STDERR, 'The following arguments are expected and not found: <submissionId>/version/<publicationId>.' . PHP_EOL);
                        break;
                    }
                    $publicationId = (int) $args[2];
                    if (!Repo::publication()->exists($publicationId, $submissionId)) {
                        fwrite(STDERR, "Publication (submission version) with the ID {$publicationId} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
                        break;
                    }
                }
                $newEntry['submissionId'] = $submissionId;
                $newEntry['assocType'] = $assocType;
                break;

            case Application::ASSOC_TYPE_SUBMISSION_FILE:
                if (!isset($args[0])) {
                    fwrite(STDERR, 'Missing submission ID URL parameter.' . PHP_EOL);
                    break;
                }
                if (!isset($args[1])) {
                    fwrite(STDERR, 'Missing galley ID URL parameter.' . PHP_EOL);
                    break;
                }
                if (!isset($args[2])) {
                    fwrite(STDERR, 'Missing file or publication ID URL parameter.' . PHP_EOL);
                    break;
                }

                $submission = Repo::submission()->getByBestId($args[0], $newEntry['contextId']);
                if (!$submission) {
                    fwrite(STDERR, "Submission with the URL path or ID {$args[0]} does not exist in the server with the ID {$newEntry['contextId']}." . PHP_EOL);
                    break;
                }
                $submissionId = $submission->getId();

                // If it is an older submission version, the arguments must be:
                // $submissionId/version/$publicationId/$representationId/$submissionFileId.
                $publicationId = null;
                if (in_array('version', $args)) {
                    if ($args[1] !== 'version' || !isset($args[2]) || !isset($args[3])) {
                        fwrite(STDERR, 'The following arguments are expected and not found: <submissionId>/version/<publicationId>/<galleyId>/<fileId>.' . PHP_EOL);
                        break;
                    }
                    $publicationId = (int) $args[2];
                    $representationUrlPath = $args[3];
                    $submissionFileId = (int) $args[4];
                } else {
                    $representationUrlPath = $args[1];
                    $submissionFileId = (int) $args[2];
                }

                $submissionFile = Repo::submissionFile()->get($submissionFileId, $submissionId);
                if (!$submissionFile) {
                    fwrite(STDERR, "Submission file with the ID {$submissionFileId} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
                    break;
                }

                // Find the galley and representation ID
                $representationId = $galley = null;
                if (ctype_digit((string) $representationUrlPath)) {
                    // assume it is ID and not the URL path
                    $representationId = (int) $representationUrlPath;
                    $galley = Repo::galley()->get($representationId);
                    if (!$galley) {
                        fwrite(STDERR, "Galley with the ID {$representationUrlPath} does not exist." . PHP_EOL);
                        break;
                    }
                } else {
                    // We need to get the publication in order to be able to get galley by URL path
                    $publications = $submission->getData('publications');
                    if (isset($publicationId)) {
                        $publication = $publications->first(function ($value, $key) use ($publicationId) {
                            return $value->getId() == $publicationId;
                        });
                        if (!$publication) {
                            fwrite(STDERR, "Publication (submission version) with the ID {$publicationId} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
                            break;
                        }
                        $galley = Repo::galley()->getByUrlPath($representationUrlPath, $publication);
                        if (!$galley) {
                            fwrite(STDERR, "Galley with the URL path {$representationUrlPath} does not exist in the publication (submission version) with the ID {$publicationId}." . PHP_EOL);
                            break;
                        }
                        $representationId = $galley->getId();
                    } else {
                        // We cannot assume that this is the current publication,
                        // because the log entry can be long time ago, and
                        // since then there could be new submission versions created,
                        // so take the first publication found with
                        // galley with the given representationUrlPath
                        // that contain the given submission file.
                        // (Different publications can contain the same galley URL path.)
                        foreach ($publications as $publication) {
                            foreach ($publication->getData('galleys') as $publicationGalley) {
                                if ($publicationGalley->getBestGalleyId() == $representationUrlPath) {
                                    if ($publicationGalley->getData('submissionFileId') == $submissionFileId) {
                                        $galley = $publicationGalley;
                                        $representationId = $publicationGalley->getId();
                                        break 2;
                                    }
                                }
                            }
                        }
                        if (!$representationId) {
                            fwrite(STDERR, "Galley with the URL path {$representationUrlPath} does not exist in the submission with the ID {$submissionId}." . PHP_EOL);
                            break;
                        }
                    }
                }
                if ($galley->getData('submissionFileId') != $submissionFileId) {
                    // This check is e.g. when representation ID (and not URL path) and submissionFileId are given as arguments
                    fwrite(STDERR, "Submission file with the ID {$submissionFileId} does not belong to the galley with the ID {$representationId}." . PHP_EOL);
                    break;
                }

                // is this a full text or supp file
                $genreDao = DAORegistry::getDAO('GenreDAO');
                $genre = $genreDao->getById($submissionFile->getData('genreId'));
                if ($genre->getCategory() != Genre::GENRE_CATEGORY_DOCUMENT || $genre->getSupplementary() || $genre->getDependent()) {
                    $newEntry['assocType'] = Application::ASSOC_TYPE_SUBMISSION_FILE_COUNTER_OTHER;
                } else {
                    $newEntry['assocType'] = $assocType;
                }
                $newEntry['submissionId'] = $submissionId;
                $newEntry['representationId'] = $representationId;
                $newEntry['submissionFileId'] = $submissionFileId;
                $newEntry['fileType'] = StatisticsHelper::getDocumentType($submissionFile->getData('mimetype'));
                break;
        }
    }
}

$tool = new ConvertApacheAccessLogFile($argv ?? []);
$tool->execute();

3g86 2022