#
# Copyright 2004-2006 Zuza Software Foundation
#
# This file is part of translate.
#
# translate is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# translate is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
#

"""
Convert HTML files to Gettext PO localization files.

See: http://docs.translatehouse.org/projects/translate-toolkit/en/latest/commands/html2po.html
for examples and usage instructions.
"""

from translate.convert import convert
from translate.storage import html, po


class html2po(convert.DocpathMerger):
    def convertfile(
        self,
        inputfile,
        filename,
        duplicatestyle="msgctxt",
        keepcomments=False,
        templatefile=None,
    ):
        """Convert an html file to .po format."""
        thetargetfile = po.pofile()
        if templatefile is None:
            self.convertfile_inner(inputfile, thetargetfile, keepcomments)
        else:
            self.mergefile(inputfile, templatefile, thetargetfile, keepcomments)
        thetargetfile.removeduplicates(duplicatestyle)
        return thetargetfile

    @staticmethod
    def convertfile_inner(inputfile, outputstore, keepcomments) -> None:
        """Extract translation units from an html file and add to a pofile object."""
        htmlparser = html.htmlfile(inputfile=inputfile)
        for htmlunit in htmlparser.units:
            thepo = outputstore.addsourceunit(htmlunit.source)
            thepo.addlocations(htmlunit.getlocations())
            context = htmlunit.getcontext()
            if context:
                thepo.setcontext(context)
            if keepcomments:
                thepo.addnote(htmlunit.getnotes(), "developer")

    def mergefile(self, inputfile, templatefile, outputstore, keepcomments) -> None:
        """Merge translation from inputfile with source from templatefile using docpath matching."""

        def process_html_unit(templateunit, storeunit):
            """Process HTML-specific unit attributes."""
            context = templateunit.getcontext()
            if context:
                storeunit.setcontext(context)
            if keepcomments:
                storeunit.addnote(templateunit.getnotes(), "developer")

        self.merge_stores_by_docpath(
            inputfile,
            templatefile,
            outputstore,
            html.htmlfile,
            filter_header=False,
            process_unit_callback=process_html_unit,
        )


def converthtml(
    inputfile,
    outputfile,
    templatefile,
    pot=False,
    duplicatestyle="msgctxt",
    keepcomments=False,
) -> int:
    """
    Reads in stdin using fromfileclass, converts using convertorclass,
    writes to stdout.
    """
    convertor = html2po()
    outputstore = convertor.convertfile(
        inputfile,
        getattr(inputfile, "name", "unknown"),
        duplicatestyle=duplicatestyle,
        keepcomments=keepcomments,
        templatefile=templatefile,
    )
    outputstore.serialize(outputfile)
    return 1


class Html2POOptionParser(convert.ConvertOptionParser):
    def __init__(self) -> None:
        formats = {
            "html": ("po", self.convert),
            "htm": ("po", self.convert),
            "xhtml": ("po", self.convert),
            None: ("po", self.convert),
        }
        super().__init__(formats, usetemplates=True, usepots=True, description=__doc__)
        self.add_option(
            "--keepcomments",
            dest="keepcomments",
            default=False,
            action="store_true",
            help="preserve html comments as translation notes in the output",
        )
        self.passthrough.append("keepcomments")
        self.add_duplicates_option()
        self.add_multifile_option()
        self.passthrough.append("pot")

    def convert(
        self,
        inputfile,
        outputfile,
        templatefile,
        pot=False,
        duplicatestyle="msgctxt",
        multifilestyle="single",
        keepcomments=False,
    ) -> int:
        """Extract translation units from one html file."""
        convertor = html2po()
        if hasattr(self, "outputstore"):
            if templatefile is None:
                convertor.convertfile_inner(inputfile, self.outputstore, keepcomments)
            else:
                convertor.mergefile(
                    inputfile, templatefile, self.outputstore, keepcomments
                )
        else:
            outputstore = convertor.convertfile(
                inputfile,
                getattr(inputfile, "name", "unknown"),
                duplicatestyle=duplicatestyle,
                keepcomments=keepcomments,
                templatefile=templatefile,
            )
            outputstore.serialize(outputfile)
        return 1

    def recursiveprocess(self, options) -> None:
        """Recurse through directories and process files. (override)."""
        if options.multifilestyle == "onefile":
            self.outputstore = po.pofile()
            super().recursiveprocess(options)
            if not self.outputstore.isempty():
                self.outputstore.removeduplicates(options.duplicatestyle)
                outputfile = super().openoutputfile(options, options.output)
                self.outputstore.serialize(outputfile)
                if options.output:
                    outputfile.close()
        else:
            super().recursiveprocess(options)

    def isrecursive(self, fileoption, filepurpose="input"):
        """Check if fileoption is a recursive file. (override)."""
        if hasattr(self, "outputstore") and filepurpose == "output":
            return True
        return super().isrecursive(fileoption, filepurpose=filepurpose)

    def checkoutputsubdir(self, options, subdir) -> None:
        """
        Check if subdir under options.output needs to be created,
        creates if necessary. Do nothing if in single-output-file mode. (override).
        """
        if hasattr(self, "outputstore"):
            return
        super().checkoutputsubdir(options, subdir)

    def openoutputfile(self, options, fulloutputpath):
        """Open the output file, or do nothing if in single-output-file mode. (override)."""
        if hasattr(self, "outputstore"):
            return None
        return super().openoutputfile(options, fulloutputpath)


def main(argv=None) -> None:
    parser = Html2POOptionParser()
    parser.run(argv)


if __name__ == "__main__":
    main()
