OpenConcerto

Dépôt officiel du code source de l'ERP OpenConcerto
sonarqube

svn://code.openconcerto.org/openconcerto

Rev

Rev 73 | Rev 180 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 * 
 * Copyright 2011 OpenConcerto, by ILM Informatique. All rights reserved.
 * 
 * The contents of this file are subject to the terms of the GNU General Public License Version 3
 * only ("GPL"). You may not use this file except in compliance with the License. You can obtain a
 * copy of the License at http://www.gnu.org/licenses/gpl-3.0.html See the License for the specific
 * language governing permissions and limitations under the License.
 * 
 * When distributing the software, include this License Header Notice in each file.
 */
 
 package org.openconcerto.openoffice.text;

import org.openconcerto.openoffice.ODDocument;
import org.openconcerto.openoffice.ODNodeDesc.Children;
import org.openconcerto.openoffice.ODPackage;
import org.openconcerto.openoffice.OOXML;
import org.openconcerto.openoffice.Style;
import org.openconcerto.openoffice.Style.ResolveResult;
import org.openconcerto.openoffice.StyledNode;
import org.openconcerto.openoffice.XMLFormatVersion;
import org.openconcerto.openoffice.spreadsheet.Cell;
import org.openconcerto.utils.CollectionUtils;
import org.openconcerto.utils.cc.IPredicate;
import org.openconcerto.xml.DescendantIterator;
import org.openconcerto.xml.JDOMUtils;
import org.openconcerto.xml.SimpleXMLPath;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;

import org.jdom.Attribute;
import org.jdom.Content;
import org.jdom.Element;
import org.jdom.Namespace;
import org.jdom.Text;

/**
 * A text node that can be created ex nihilo. Ie without a document at first.
 * 
 * @author Sylvain CUAZ
 * 
 * @param <S> type of style.
 */
public abstract class TextNode<S extends TextStyle> extends StyledNode<S, ODDocument> {

    // see §6.1.2 White Space Characters of OpenDocument v1.2
    private static final Pattern multiSpacePattern = Pattern.compile("[\t\r\n ]+");
    public static final String LINE_SEPARATOR = "\u2028";
    private static final char LINE_SEPARATOR_CHAR = LINE_SEPARATOR.charAt(0);
    public static final String PARAGRAPH_SEPARATOR = "\u2029";
    /**
     * Used by Microsoft Word as a line separator.
     * 
     * @see <a href="http://support.microsoft.com/kb/59096/en-us">Microsoft</a>
     * @see <a href="http://unicode.org/reports/tr13/tr13-9.html">UNICODE NEWLINE GUIDELINES</a>
     */
    public static final char VERTICAL_TAB_CHAR = '\u000B';

    static public String getChildrenCharacterContent(final Element parentElem, final XMLFormatVersion vers, final boolean ooMode) {
        return getChildrenCharacterContent(parentElem, vers, ooMode, false);
    }

    /**
     * Return the text value of the passed element.
     * 
     * @param parentElem an element containing paragraphs.
     * @param vers the version of the element.
     * @param ooMode whether to use the OO way or the standard way.
     * @param useSeparator if <code>true</code> line-breaks are returned as {@value #LINE_SEPARATOR}
     *        and paragraphs as {@value #PARAGRAPH_SEPARATOR}, if <code>false</code> only
     *        <code>'\n'</code> is used.
     * @return the parsed text value.
     * @see #getCharacterContent(Element, XMLFormatVersion, boolean, boolean)
     */
    static public String getChildrenCharacterContent(final Element parentElem, final XMLFormatVersion vers, final boolean ooMode, final boolean useSeparator) {
        final List<String> ps = getChildrenCharacterContent(parentElem, vers, ooMode, useSeparator, null);
        return CollectionUtils.join(ps, useSeparator ? PARAGRAPH_SEPARATOR : "\n");

    }

    static private List<String> getChildrenCharacterContent(final Element parentElem, final XMLFormatVersion vers, final boolean ooMode, final boolean useSeparator, final Option option) {
        final List<String> ps = new ArrayList<String>();
        for (final Object o : parentElem.getChildren()) {
            final Element child = (Element) o;
            if ((child.getName().equals("p") || child.getName().equals("h")) && child.getNamespacePrefix().equals("text")) {
                @SuppressWarnings("unchecked")
                final List<Content> content = child.getContent();
                ps.add(getCharacterContent(content, vers, ooMode, useSeparator, option));
            }
        }
        return ps;
    }

    /**
     * Get the number of lines in the passed element.
     * 
     * @param parentElem an element containing paragraphs.
     * @param vers the version of the element.
     * @param ooMode whether to use the OO way or the standard way.
     * @return 0 if the element contains no paragraphs, otherwise the number of paragraphs and line
     *         breaks.
     */
    static public int getLinesCount(final Element parentElem, final XMLFormatVersion vers, final boolean ooMode) {
        final List<String> ps = getChildrenCharacterContent(parentElem, vers, ooMode, false, Option.ONLY_SEP);
        int res = 0;
        for (final String p : ps) {
            // one line for the paragraph plus one for each line break
            res += 1 + p.length();
        }
        return res;
    }

    /**
     * Return the text value of the passed element. This method doesn't just return the XML text
     * content, it also parses XML elements (like paragraphs, tabs and line-breaks). For the
     * differences between the OO way (as of 3.1) and the OpenDocument way see section 5.1.1
     * White-space Characters of OpenDocument-v1.0-os and §6.1.2 of OpenDocument-v1.2-part1. In
     * essence OpenOffice never trim strings.
     * 
     * @param pElem a text element, e.g. text:p or text:h.
     * @param vers the version of the element.
     * @param ooMode whether to use the OO way or the standard way.
     * @return the parsed text value.
     */
    static public final String getCharacterContent(final Element pElem, final XMLFormatVersion vers, final boolean ooMode) {
        return getCharacterContent(pElem, vers, ooMode, false);
    }

    /**
     * Return the text value of the passed element. This method doesn't just return the XML text
     * content, it also parses XML elements (like paragraphs, tabs and line-breaks). For the
     * differences between the OO way (as of 3.1) and the OpenDocument way see section 5.1.1
     * White-space Characters of OpenDocument-v1.0-os and §6.1.2 of OpenDocument-v1.2-part1. In
     * essence OpenOffice never trim strings.
     * 
     * @param pElem a text element, e.g. text:p or text:h.
     * @param vers the version of the element.
     * @param ooMode whether to use the OO way or the standard way.
     * @param useSeparator if <code>true</code> line-breaks are returned as {@value #LINE_SEPARATOR}
     *        otherwise <code>'\n'</code> is used.
     * @return the parsed text value.
     */
    @SuppressWarnings("unchecked")
    static public final String getCharacterContent(final Element pElem, final XMLFormatVersion vers, final boolean ooMode, final boolean useSeparator) {
        return getCharacterContent(pElem.getContent(), vers, ooMode, useSeparator, null);
    }

    private static enum Option {
        STOP_AT_FIRST_CHAR, ONLY_SEP
    }

    static private final String getCharacterContent(final List<Content> pElem, final XMLFormatVersion vers, final boolean ooMode, final boolean useSeparator, final Option option) {
        if (pElem.isEmpty())
            return "";
        final OOXML xml = OOXML.get(vers, false);

        final StringBuilder sb = new StringBuilder();
        final Namespace textNS = xml.getVersion().getTEXT();
        final Element tabElem = xml.getTab();
        final Element newLineElem = xml.getLineBreak();
        // true if the string ends with a space that wasn't expanded from an XML element (e.g.
        // <tab/> or <text:s/>)
        boolean spaceSuffix = false;
        final Iterator<?> iter = new DescendantIterator(pElem, new IPredicate<Content>() {
            @Override
            public boolean evaluateChecked(Content input) {
                if (input instanceof Element) {
                    // don't descend into frames, graphical shapes...
                    return !((Element) input).getNamespace().getPrefix().equals("draw");
                }
                return true;
            }
        });
        while (iter.hasNext()) {
            final Object o = iter.next();
            if (option == Option.ONLY_SEP) {
                if (o instanceof Element && JDOMUtils.equals((Element) o, newLineElem)) {
                    sb.append(useSeparator ? LINE_SEPARATOR_CHAR : '\n');
                }
            } else {
                if (o instanceof Text) {
                    final String text = multiSpacePattern.matcher(((Text) o).getText()).replaceAll(" ");
                    // trim leading
                    if (!ooMode && text.startsWith(" ") && (spaceSuffix || sb.length() == 0))
                        sb.append(text.substring(1));
                    else
                        sb.append(text);
                    spaceSuffix = text.endsWith(" ");
                } else if (o instanceof Element) {
                    // perhaps handle conditions (conditional-text, hiddenparagraph, hidden-text)
                    final Element elem = (Element) o;
                    if (JDOMUtils.equals(elem, tabElem)) {
                        sb.append('\t');
                    } else if (JDOMUtils.equals(elem, newLineElem)) {
                        sb.append(useSeparator ? LINE_SEPARATOR_CHAR : '\n');
                    } else if (elem.getName().equals("s") && elem.getNamespace().equals(textNS)) {
                        final int count = Integer.valueOf(elem.getAttributeValue("c", textNS, "1"));
                        final char[] toAdd = new char[count];
                        Arrays.fill(toAdd, ' ');
                        sb.append(toAdd);
                    }
                }
            }
            if (option == Option.STOP_AT_FIRST_CHAR && sb.length() > 0)
                return sb.toString();
        }
        // trim trailing
        if (option != Option.ONLY_SEP && !ooMode && spaceSuffix)
            sb.deleteCharAt(sb.length() - 1);

        return sb.toString();
    }

    // return the one and only <span> that contains the whole text, null otherwise.
    @SuppressWarnings("unchecked")
    static public final Element getWholeSpan(final Element pElem, final XMLFormatVersion vers, final boolean ooMode) {
        final Iterator<Element> spanIter = pElem.getContent(TextNodeDesc.get(Span.class).getFilter(vers)).iterator();
        if (!spanIter.hasNext())
            return null;
        final Element first = spanIter.next();
        if (spanIter.hasNext())
            return null;

        final int index = pElem.indexOf(first);
        if (getCharacterContent(pElem.getContent().subList(0, index), vers, ooMode, false, Option.STOP_AT_FIRST_CHAR).length() > 0
                || getCharacterContent(pElem.getContent().subList(index + 1, pElem.getContentSize()), vers, ooMode, false, Option.STOP_AT_FIRST_CHAR).length() > 0)
            return null;
        return first;
    }

    private final XMLFormatVersion version;
    protected ODDocument parent;

    // not public since, local element cannot be checked against vers
    protected TextNode(final Element local, final Class<S> styleClass, final XMLFormatVersion vers) {
        this(local, styleClass, vers, null);
    }

    protected TextNode(final Element local, final Class<S> styleClass, final ODDocument parent) {
        this(local, styleClass, null, parent);
    }

    private TextNode(final Element local, final Class<S> styleClass, final XMLFormatVersion vers, final ODDocument parent) {
        super(local, styleClass);
        this.version = vers == null ? parent.getFormatVersion() : vers;
        if (this.version == null)
            throw new NullPointerException("No version");
        this.setDocument(parent);
    }

    public final XMLFormatVersion getVersion() {
        return this.version;
    }

    @Override
    public final ODDocument getODDocument() {
        return this.parent;
    }

    public final void detach() {
        this.setDocument(null);
    }

    private final void setDocument(ODDocument doc) {
        if (doc != this.parent) {
            if (doc == null) {
                this.getElement().detach();
                this.parent = null;
            } else if (doc.getPackage().getXMLFile(this.getElement().getDocument()) == null) {
                throw new IllegalArgumentException("Not already in the passed document");
            } else {
                this.parent = doc;
            }
        }
    }

    public final void addToDocument(ODDocument doc, Element where, int index) {
        if (doc == null) {
            this.detach();
        } else {
            this.checkDocument(doc.getPackage(), where);
            if (index < 0)
                where.addContent(this.getElement());
            else
                where.addContent(index, this.getElement());
            this.setDocument(doc);
        }
    }

    protected final void checkDocument(final ODPackage pkg, final Element where) {
        if (!pkg.getFormatVersion().equals(this.getVersion()))
            throw new IllegalArgumentException("Version mismatch : " + this.getVersion() + " != " + pkg.getFormatVersion());
        if (pkg.getXMLFile(where.getDocument()) == null)
            throw new IllegalArgumentException("Where element not in the passed package");
        if (this.getStyleName() != null && getStyle(pkg, where.getDocument()) == null)
            throw new IllegalArgumentException("unknown style " + getStyleName() + " in " + pkg);
        for (final Attribute attr : SimpleXMLPath.allAttributes().selectNodes(getElement())) {
            if (Style.resolveReference(pkg, where.getDocument(), attr) == ResolveResult.NOT_RESOLVED)
                throw new IllegalArgumentException(this + " is using an undefined style : " + attr);
        }
    }

    public final void addTab() {
        this.getElement().addContent(OOXML.get(this.getVersion()).getTab());
    }

    public final void addContent(String text) {
        this.getElement().addContent(OOXML.get(this.getVersion()).encodeWSasList(text));
    }

    public final Span addStyledContent(String text, String styleName) {
        final Element elem = Span.createEmpty(getVersion());
        getElement().addContent(elem);
        final Span res = createSpan(elem);
        res.addContent(text);
        res.setStyleName(styleName);
        return res;
    }

    private Span createSpan(final Element elem) {
        final ODDocument doc = this.getODDocument();
        return doc == null ? TextNodeDesc.get(Span.class).wrapNode(getVersion(), elem) : TextNodeDesc.get(Span.class).wrapNode(doc, elem);
    }

    public final String getCharacterContent() {
        return this.getCharacterContent(Cell.getTextValueMode());
    }

    public final String getCharacterContent(final boolean ooMode) {
        return getCharacterContent(this.getElement(), getVersion(), ooMode);
    }

    public final Children<Span> getSpans() {
        final TextNodeDesc<Span> nodeDesc = TextNodeDesc.get(Span.class);
        // perhaps Children should get the document dynamically (since it can change)
        final ODDocument doc = this.getODDocument();
        return doc == null ? nodeDesc.getChildren(getVersion(), getElement()) : nodeDesc.getChildren(doc, getElement());
    }
}