Logo Search packages:      
Sourcecode: jabref version File versions  Download package

AuthorList.java

package net.sf.jabref;

import net.sf.jabref.export.layout.format.CreateDocBookAuthors;

import java.util.Vector;
import java.util.WeakHashMap;

/**
 * This is an immutable class representing information of either <CODE>author</CODE>
 * or <CODE>editor</CODE> field in bibtex record.
 * <p>
 * Constructor performs parsing of raw field text and stores preformatted data.
 * Various accessor methods return author/editor field in different formats.
 * <p>
 * Parsing algorithm is designed to satisfy two requirements: (a) when author's
 * name is typed correctly, the result should coincide with the one of BiBTeX;
 * (b) for erroneous names, output should be reasonable (but may differ from
 * BiBTeX output). The following rules are used:
 * <ol>
 * <li> 'author field' is a sequence of tokens;
 * <ul>
 * <li> tokens are separated by sequences of whitespaces (<CODE>Character.isWhitespace(c)==true</CODE>),
 * commas (,), dashes (-), and tildas (~);
 * <li> every comma separates tokens, while sequences of other separators are
 * equivalent to a single separator; for example: "a - b" consists of 2 tokens
 * ("a" and "b"), while "a,-,b" consists of 3 tokens ("a", "", and "b")
 * <li> anything enclosed in braces belonges to a single token; for example:
 * "abc x{a,b,-~ c}x" consists of 2 tokens, while "abc xa,b,-~ cx" consists of 4
 * tokens ("abc", "xa","b", and "cx");
 * <li> a token followed immediately by a dash is "dash-terminated" token, and
 * all other tokens are "space-terminated" tokens; for example: in "a-b- c - d"
 * tokens "a" and "b" are dash-terminated and "c" and "d" are space-terminated;
 * <li> for the purposes of splitting of 'author name' into parts and
 * construction of abbreviation of first name, one needs definitions of first
 * latter of a token, case of a token, and abbreviation of a token:
 * <ul>
 * <li> 'first letter' of a token is the first letter character (<CODE>Character.isLetter(c)==true</CODE>)
 * that does not belong to a sequence of letters that immediately follows "\"
 * character, with one exception: if "\" is followed by "aa", "AA", "ae", "AE",
 * "l", "L", "o", "O", "oe", "OE", "i", or "j" followed by non-letter, the
 * 'first letter' of a token is a letter that follows "\"; for example: in
 * "a{x}b" 'first letter' is "a", in "{\"{U}}bel" 'first letter' is "U", in
 * "{\noopsort{\"o}}xyz" 'first letter' is "o", in "{\AE}x" 'first letter' is
 * "A", in "\aex\ijk\Oe\j" 'first letter' is "j"; if there is no letter
 * satisfying the above rule, 'first letter' is undefined;
 * <li> token is "lower-case" token, if its first letter id defined and is
 * lower-case (<CODE>Character.isLowerCase(c)==true</CODE>), and token is
 * "upper-case" token otherwise;
 * <li> 'abbreviation' of a token is the shortest prefix of the token that (a)
 * contains 'first letter' and (b) is braces-balanced; if 'first letter' is
 * undefined, 'abbreviation' is the token itself; in the above examples,
 * 'abbreviation's are "a", "{\"{U}}", "{\noopsort{\"o}}", "{\AE}",
 * "\aex\ijk\Oe\j";
 * </ul>
 * <li> the behavior based on the above definitions will be erroneous only in
 * one case: if the first-name-token is "{\noopsort{A}}john", we abbreviate it
 * as "{\noopsort{A}}.", while BiBTeX produces "j."; fixing this problem,
 * however, requires processing of the preabmle;
 * </ul>
 * <li> 'author name's in 'author field' are subsequences of tokens separated by
 * token "and" ("and" is case-insensitive); if 'author name' is an empty
 * sequence of tokens, it is ignored; for examle, both "John Smith and Peter
 * Black" and "and and John Smith and and Peter Black" consists of 2 'author
 * name's "Johm Smith" and "Peter Black" (in erroneous situations, this is a bit
 * different from BiBTeX behavior);
 * <li> 'author name' consists of 'first-part', 'von-part', 'last-part', and
 * 'junior-part', each of which is a sequence of tokens; how a sequence of
 * tokens has to be splitted into these parts, depends the number of commas:
 * <ul>
 * <li> no commas, all tokens are upper-case: 'junior-part' and 'von-part' are
 * empty, 'last-part' consist of the last token, 'first-part' consists of all
 * other tokens ('first-part' is empty, if 'author name' consists of a single
 * token); for example, in "John James Smith", 'last-part'="Smith" and
 * 'first-part'="John James";
 * <li> no commas, there exists lower-case token: 'junior-part' is empty,
 * 'first-part' consists of all upper-case tokens before the first lower-case
 * token, 'von-part' consists of lower-case tokens starting the first lower-case
 * token and ending the lower-case token that is followed by upper-case token,
 * 'last-part' consists of the rest of tokens; note that both 'first-part' and
 * 'latst-part' may be empty and 'last-part' may contain lower-case tokens; for
 * example: in "von der", 'first-part'='last-part'="", 'von-part'="von der"; in
 * "Charles Louis Xavier Joseph de la Vall{\'e}e la Poussin",
 * 'first-part'="Charles Louis Xavier Joseph", 'von-part'="de la",
 * 'last-part'="Vall{\'e}e la Poussin";
 * <li> one comma: 'junior-part' is empty, 'first-part' consists of all tokens
 * after comma, 'von-part' consists of the longest sequence of lower-case tokens
 * in the very beginning, 'last-part' consists of all tokens after 'von-part'
 * and before comma; note that any part can be empty; for example: in "de la
 * Vall{\'e}e la Poussin, Charles Louis Xavier Joseph", 'first-part'="Charles
 * Louis Xavier Joseph", 'von-part'="de la", 'last-part'="Vall{\'e}e la
 * Poussin"; in "Joseph de la Vall{\'e}e la Poussin, Charles Louis Xavier",
 * 'first-part'="Charles Louis Xavier", 'von-part'="", 'last-part'="Joseph de la
 * Vall{\'e}e la Poussin";
 * <li> two or more commas (any comma after the second one is ignored; it merely
 * separates tokens): 'junior-part' consists of all tokens between first and
 * second commas, 'first-part' consists of all tokens after the second comma,
 * tokens before the first comma are splitted into 'von-part' and 'last-part'
 * similarly to the case of one comma; for example: in "de la Vall{\'e}e
 * Poussin, Jr., Charles Louis Xavier Joseph", 'first-part'="Charles Louis
 * Xavier Joseph", 'von-part'="de la", 'last-part'="Vall{\'e}e la Poussin", and
 * 'junior-part'="Jr.";
 * </ul>
 * <li> when 'first-part', 'last-part', 'von-part', or 'junior-part' is
 * reconstructed from tokens, tokens in a part are separated either by space or
 * by dash, depending on whether the token before the separator was
 * space-terminated or dash-terminated; for the last token in a part it does not
 * matter whether it was dash- or space-terminated;
 * <li> when 'first-part' is abbreviated, each token is replaced by its
 * abbreviation followed by a period; separators are the same as in the case of
 * non-abbreviated name; for example: in "Heinrich-{\"{U}}bel Kurt von Minich",
 * 'first-part'="Heinrich-{\"{U}}bel Kurt", and its abbreviation is "H.-{\"{U}}.
 * K."
 * </ol>
 * 
 * @see tests.net.sf.jabref.AuthorListTest Testcases for this class.
 */
00117 public class AuthorList {

      private Vector<Author> authors; 

      // Variables for storing computed strings, so they only need be created
      // once:
      private String authorsNatbib = null, authorsFirstFirstAnds = null,
            authorsAlph = null;

      private String[] authorsFirstFirst = new String[4], authorsLastOnly = new String[2],
      authorLastFirstAnds = new String[2], 
      authorsLastFirst = new String[4],
    authorsLastFirstFirstLast = new String[2];


      // The following variables are used only during parsing

      private String orig; // the raw bibtex author/editor field

      // the following variables are updated by getToken procedure
      private int token_start; // index in orig

      private int token_end; // to point 'abc' in ' abc xyz', start=2 and end=5

      // the following variables are valid only if getToken returns TOKEN_WORD
      private int token_abbr; // end of token abbreviation (always: token_start <

      // token_abbr <= token_end)

      private char token_term; // either space or dash

      private boolean token_case; // true if upper-case token, false if lower-case

      // token

      // Tokens of one author name.
      // Each token occupies TGL consecutive entries in this vector (as described
      // below)
      private Vector<Object> tokens;

      private static final int TOKEN_GROUP_LENGTH = 4; // number of entries for

      // a token

      // the following are offsets of an entry in a group of entries for one token
      private static final int OFFSET_TOKEN = 0; // String -- token itself;

      private static final int OFFSET_TOKEN_ABBR = 1; // String -- token

      // abbreviation;

      private static final int OFFSET_TOKEN_TERM = 2; // Character -- token

      // terminator (either " " or
      // "-")

      // private static final int OFFSET_TOKEN_CASE = 3; // Boolean --
      // true=uppercase, false=lowercase
      // the following are indices in 'tokens' vector created during parsing of
      // author name
      // and later used to properly split author name into parts
      int von_start, // first lower-case token (-1 if all tokens upper-case)
            last_start, // first upper-case token after first lower-case token (-1
            // if does not exist)
            comma_first, // token after first comma (-1 if no commas)
            comma_second; // token after second comma (-1 if no commas or only one

      // comma)

      // Token types (returned by getToken procedure)
      private static final int TOKEN_EOF = 0;

      private static final int TOKEN_AND = 1;

      private static final int TOKEN_COMMA = 2;

      private static final int TOKEN_WORD = 3;

      // Constant Hashtable containing names of TeX special characters
      private static final java.util.HashSet<String> tex_names = new java.util.HashSet<String>();
      // and static constructor to initialize it
      static {
            tex_names.add("aa");
            tex_names.add("ae");
            tex_names.add("l");
            tex_names.add("o");
            tex_names.add("oe");
            tex_names.add("i");
            tex_names.add("AA");
            tex_names.add("AE");
            tex_names.add("L");
            tex_names.add("O");
            tex_names.add("OE");
            tex_names.add("j");
      }

      static WeakHashMap<String, AuthorList> authorCache = new WeakHashMap<String, AuthorList>();

      /**
       * Parses the parameter strings and stores preformatted author information.
       * 
       * Don't call this constructor directly but rather use the getAuthorList()
       * method which caches its results.
       * 
       * @param bibtex_authors
       *            contents of either <CODE>author</CODE> or <CODE>editor</CODE>
       *            bibtex field.
       */
00225       protected AuthorList(String bibtex_authors) {
            authors = new Vector<Author>(5); // 5 seems to be reasonable initial size
            orig = bibtex_authors; // initialization
            token_start = 0;
            token_end = 0; // of parser
            while (token_start < orig.length()) {
                  Author author = getAuthor();
                  if (author != null)
                        authors.add(author);
            }
            // clean-up
            orig = null;
            tokens = null;
      }

      /**
       * Retrieve an AuthorList for the given string of authors or editors.
       * 
       * This function tries to cache AuthorLists by string passed in.
       * 
       * @param authors
       *            The string of authors or editors in bibtex format to parse.
       * @return An AuthorList object representing the given authors.
       */
00249       public static AuthorList getAuthorList(String authors) {
            AuthorList authorList = authorCache.get(authors);
            if (authorList == null) {
                  authorList = new AuthorList(authors);
                  authorCache.put(authors, authorList);
            }
            return authorList;
      }

      /**
       * This is a convenience method for getAuthorsFirstFirst()
       * 
       * @see net.sf.jabref.AuthorList#getAuthorsFirstFirst
       */
00263       public static String fixAuthor_firstNameFirstCommas(String authors, boolean abbr,
            boolean oxfordComma) {
            return getAuthorList(authors).getAuthorsFirstFirst(abbr, oxfordComma);
      }

      /**
       * This is a convenience method for getAuthorsFirstFirstAnds()
       * 
       * @see net.sf.jabref.AuthorList#getAuthorsFirstFirstAnds
       */
00273       public static String fixAuthor_firstNameFirst(String authors) {
            return getAuthorList(authors).getAuthorsFirstFirstAnds();
      }

      /**
       * This is a convenience method for getAuthorsLastFirst()
       * 
       * @see net.sf.jabref.AuthorList#getAuthorsLastFirst
       */
00282       public static String fixAuthor_lastNameFirstCommas(String authors, boolean abbr,
            boolean oxfordComma) {
            return getAuthorList(authors).getAuthorsLastFirst(abbr, oxfordComma);
      }

      /**
       * This is a convenience method for getAuthorsLastFirstAnds(true)
       * 
       * @see net.sf.jabref.AuthorList#getAuthorsLastFirstAnds
       */
00292       public static String fixAuthor_lastNameFirst(String authors) {
            return getAuthorList(authors).getAuthorsLastFirstAnds(false);
      }
      
      /**
       * This is a convenience method for getAuthorsLastFirstAnds()
       * 
       * @see net.sf.jabref.AuthorList#getAuthorsLastFirstAnds
       */
00301       public static String fixAuthor_lastNameFirst(String authors, boolean abbreviate) {
            return getAuthorList(authors).getAuthorsLastFirstAnds(abbreviate);
      }

      /**
       * This is a convenience method for getAuthorsLastOnly()
       * 
       * @see net.sf.jabref.AuthorList#getAuthorsLastOnly
       */
00310       public static String fixAuthor_lastNameOnlyCommas(String authors, boolean oxfordComma) {
            return getAuthorList(authors).getAuthorsLastOnly(oxfordComma);
      }

      /**
       * This is a convenience method for getAuthorsForAlphabetization()
       * 
       * @see net.sf.jabref.AuthorList#getAuthorsForAlphabetization
       */
00319       public static String fixAuthorForAlphabetization(String authors) {
            return getAuthorList(authors).getAuthorsForAlphabetization();
      }

      /**
       * This is a convenience method for getAuthorsNatbib()
       * 
       * @see net.sf.jabref.AuthorList#getAuthorsNatbib
       */
00328       public static String fixAuthor_Natbib(String authors) {
            return AuthorList.getAuthorList(authors).getAuthorsNatbib();
      }

      /**
       * Parses one author name and returns preformatted information.
       * 
       * @return Preformatted author name; <CODE>null</CODE> if author name is
       *         empty.
       */
00338       private Author getAuthor() {

            tokens = new Vector<Object>(); // initialization
            von_start = -1;
            last_start = -1;
            comma_first = -1;
            comma_second = -1;

            // First step: collect tokens in 'tokens' Vector and calculate indices
            token_loop: while (true) {
                  int token = getToken();
                  cases: switch (token) {
                  case TOKEN_EOF:
                  case TOKEN_AND:
                        break token_loop;
                  case TOKEN_COMMA:
                        if (comma_first < 0)
                              comma_first = tokens.size();
                        else if (comma_second < 0)
                              comma_second = tokens.size();
                        break cases;
                  case TOKEN_WORD:
                        tokens.add(orig.substring(token_start, token_end));
                        tokens.add(orig.substring(token_start, token_abbr));
                        tokens.add(new Character(token_term));
                        tokens.add(Boolean.valueOf(token_case));
                        if (comma_first >= 0)
                              break cases;
                        if (last_start >= 0)
                              break cases;
                        if (von_start < 0) {
                              if (!token_case) {
                                    von_start = tokens.size() - TOKEN_GROUP_LENGTH;
                                    break cases;
                              }
                        } else if (last_start < 0 && token_case) {
                              last_start = tokens.size() - TOKEN_GROUP_LENGTH;
                              break cases;
                        }
                  }
            }// end token_loop

            // Second step: split name into parts (here: calculate indices
            // of parts in 'tokens' Vector)
            if (tokens.size() == 0)
                  return null; // no author information


            // the following negatives indicate absence of the corresponding part
            int first_part_start = -1, von_part_start = -1, last_part_start = -1, jr_part_start = -1;
            int first_part_end = 0, von_part_end = 0, last_part_end = 0, jr_part_end = 0;
        boolean jrAsFirstname = false;
            if (comma_first < 0) { // no commas
                  if (von_start < 0) { // no 'von part'
                        last_part_end = tokens.size();
                        last_part_start = tokens.size() - TOKEN_GROUP_LENGTH;
                        int index = tokens.size() - 2 * TOKEN_GROUP_LENGTH + OFFSET_TOKEN_TERM;
                        if (index > 0) {
                              Character ch = (Character)tokens.elementAt(index);
                              if (ch.charValue() == '-')
                                    last_part_start -= TOKEN_GROUP_LENGTH;
                        }
                        first_part_end = last_part_start;
                        if (first_part_end > 0) {
                              first_part_start = 0;
                        }
                  } else { // 'von part' is present
                        if (last_start >= 0) {
                              last_part_end = tokens.size();
                              last_part_start = last_start;
                              von_part_end = last_part_start;
                        } else {
                              von_part_end = tokens.size();
                        }
                        von_part_start = von_start;
                        first_part_end = von_part_start;
                        if (first_part_end > 0)
                              first_part_start = 0;
                  }
            } else { // commas are present: it affects only 'first part' and
                  // 'junior part'
                  first_part_end = tokens.size();
                  if (comma_second < 0) { // one comma
                        if (comma_first < first_part_end) {
                    first_part_start = comma_first;
                    //if (((String)tokens.get(first_part_start)).toLowerCase().startsWith("jr."))
                    //    jrAsFirstname = true;
                }
                  } else { // two or more commas
                        if (comma_second < first_part_end)
                              first_part_start = comma_second;
                        jr_part_end = comma_second;
                        if (comma_first < jr_part_end)
                              jr_part_start = comma_first;
                  }
                  if (von_start != 0) { // no 'von part'
                        last_part_end = comma_first;
                        if (last_part_end > 0)
                              last_part_start = 0;
                  } else { // 'von part' is present
                        if (last_start < 0) {
                              von_part_end = comma_first;
                        } else {
                              last_part_end = comma_first;
                              last_part_start = last_start;
                              von_part_end = last_part_start;
                        }
                        von_part_start = 0;
                  }
            }

        if ((first_part_start == -1) && (last_part_start == -1) && (von_part_start != -1)) {
            // There is no first or last name, but we have a von part. This is likely
            // to indicate a single-entry name without an initial capital letter, such
            // as "unknown".
            // We make the von part the last name, to facilitate handling by last-name formatters:
            last_part_start = von_part_start;
            last_part_end = von_part_end;
            von_part_start = -1;
            von_part_end = -1;
        }
        if (jrAsFirstname) {
            // This variable, if set, indicates that the first name starts with "jr.", which
            // is an indication that we may have a name formatted as "Firstname Lastname, Jr."
            // which is an acceptable format for BibTeX.
        }

            // Third step: do actual splitting, construct Author object
            return new Author((first_part_start < 0 ? null : concatTokens(first_part_start,
                  first_part_end, OFFSET_TOKEN, false)), (first_part_start < 0 ? null : concatTokens(
                  first_part_start, first_part_end, OFFSET_TOKEN_ABBR, true)), (von_part_start < 0 ? null
                  : concatTokens(von_part_start, von_part_end, OFFSET_TOKEN, false)),
                  (last_part_start < 0 ? null : concatTokens(last_part_start, last_part_end,
                        OFFSET_TOKEN, false)), (jr_part_start < 0 ? null : concatTokens(jr_part_start,
                        jr_part_end, OFFSET_TOKEN, false)));
      }

      /**
       * Concatenates list of tokens from 'tokens' Vector. Tokens are separated by
       * spaces or dashes, dependeing on stored in 'tokens'. Callers always ensure
       * that start < end; thus, there exists at least one token to be
       * concatenated.
       * 
       * @param start
       *            index of the first token to be concatenated in 'tokens' Vector
       *            (always divisible by TOKEN_GROUP_LENGTH).
       * @param end
       *            index of the first token not to be concatenated in 'tokens'
       *            Vector (always divisible by TOKEN_GROUP_LENGTH).
       * @param offset
       *            offset within token group (used to request concatenation of
       *            either full tokens or abbreviation).
       * @param dot_after
       *            <CODE>true</CODE> -- add period after each token, <CODE>false</CODE> --
       *            do not add.
       * @return the result of concatenation.
       */
00495       private String concatTokens(int start, int end, int offset, boolean dot_after) {
            StringBuffer res = new StringBuffer();
            // Here we always have start < end
            res.append((String) tokens.get(start + offset));
            if (dot_after)
                  res.append('.');
            start += TOKEN_GROUP_LENGTH;
            while (start < end) {
                  res.append(tokens.get(start - TOKEN_GROUP_LENGTH + OFFSET_TOKEN_TERM));
                  res.append((String) tokens.get(start + offset));
                  if (dot_after)
                        res.append('.');
                  start += TOKEN_GROUP_LENGTH;
            }
            return res.toString();
      }

      /**
       * Parses the next token.
       * <p>
       * The string being parsed is stored in global variable <CODE>orig</CODE>,
       * and position which parsing has to start from is stored in global variable
       * <CODE>token_end</CODE>; thus, <CODE>token_end</CODE> has to be set
       * to 0 before the first invocation. Procedure updates <CODE>token_end</CODE>;
       * thus, subsequent invocations do not require any additional variable
       * settings.
       * <p>
       * The type of the token is returned; if it is <CODE>TOKEN_WORD</CODE>,
       * additional information is given in global variables <CODE>token_start</CODE>,
       * <CODE>token_end</CODE>, <CODE>token_abbr</CODE>, <CODE>token_term</CODE>,
       * and <CODE>token_case</CODE>; namely: <CODE>orig.substring(token_start,token_end)</CODE>
       * is the thext of the token, <CODE>orig.substring(token_start,token_abbr)</CODE>
       * is the token abbreviation, <CODE>token_term</CODE> contains token
       * terminator (space or dash), and <CODE>token_case</CODE> is <CODE>true</CODE>,
       * if token is upper-case and <CODE>false</CODE> if token is lower-case.
       * 
       * @return <CODE>TOKEN_EOF</CODE> -- no more tokens, <CODE>TOKEN_COMMA</CODE> --
       *         token is comma, <CODE>TOKEN_AND</CODE> -- token is the word
       *         "and" (or "And", or "aND", etc.), <CODE>TOKEN_WORD</CODE> --
       *         token is a word; additional information is given in global
       *         variables <CODE>token_start</CODE>, <CODE>token_end</CODE>,
       *         <CODE>token_abbr</CODE>, <CODE>token_term</CODE>, and
       *         <CODE>token_case</CODE>.
       */
00539       private int getToken() {
            token_start = token_end;
            while (token_start < orig.length()) {
                  char c = orig.charAt(token_start);
                  if (!(c == '~' || c == '-' || Character.isWhitespace(c)))
                        break;
                  token_start++;
            }
            token_end = token_start;
            if (token_start >= orig.length())
                  return TOKEN_EOF;
            if (orig.charAt(token_start) == ',') {
                  token_end++;
                  return TOKEN_COMMA;
            }
            token_abbr = -1;
            token_term = ' ';
            token_case = true;
            int braces_level = 0;
            int current_backslash = -1;
            boolean first_letter_is_found = false;
            while (token_end < orig.length()) {
                  char c = orig.charAt(token_end);
                  if (c == '{') {
                        braces_level++;
                  }
                  if (braces_level > 0)
                        if (c == '}')
                              braces_level--;
                  if (first_letter_is_found && token_abbr < 0 && braces_level == 0)
                        token_abbr = token_end;
                  if (!first_letter_is_found && current_backslash < 0 && Character.isLetter(c)) {
                        token_case = Character.isUpperCase(c);
                        first_letter_is_found = true;
                  }
                  if (current_backslash >= 0 && !Character.isLetter(c)) {
                        if (!first_letter_is_found) {
                              String tex_cmd_name = orig.substring(current_backslash + 1, token_end);
                              if (tex_names.contains(tex_cmd_name)) {
                                    token_case = Character.isUpperCase(tex_cmd_name.charAt(0));
                                    first_letter_is_found = true;
                              }
                        }
                        current_backslash = -1;
                  }
                  if (c == '\\')
                        current_backslash = token_end;
                  if (braces_level == 0)
                        if (c == ',' || c == '~' || c=='-' || Character.isWhitespace(c))
                              break;
                  // Morten Alver 18 Apr 2006: Removed check for hyphen '-' above to
                  // prevent
                  // problems with names like Bailey-Jones getting broken up and
                  // sorted wrong.
                  // Aaron Chen 14 Sep 2008: Enable hyphen check for first names like Chang-Chin
                  token_end++;
            }
            if (token_abbr < 0)
                  token_abbr = token_end;
            if (token_end < orig.length() && orig.charAt(token_end) == '-')
                  token_term = '-';
            if (orig.substring(token_start, token_end).equalsIgnoreCase("and"))
                  return TOKEN_AND;
            else
                  return TOKEN_WORD;
      }

      /**
       * Returns the number of author names in this object.
       * 
       * @return the number of author names in this object.
       */
00611       public int size() {
            return authors.size();
      }

      /**
       * Returns the <CODE>Author</CODE> object for the i-th author.
       * 
       * @param i
       *            Index of the author (from 0 to <CODE>size()-1</CODE>).
       * @return the <CODE>Author</CODE> object.
       */
00622       public Author getAuthor(int i) {
            return authors.get(i);
      }

      /**
       * Returns the list of authors in "natbib" format.
       * <p>
       * <ul>
       * <li>"John Smith" -> "Smith"</li>
       * <li>"John Smith and Black Brown, Peter" ==> "Smith and Black Brown"</li>
       * <li>"John von Neumann and John Smith and Black Brown, Peter" ==> "von
       * Neumann et al." </li>
       * </ul>
       * 
       * @return formatted list of authors.
       */
00638       public String getAuthorsNatbib() {
            // Check if we've computed this before:
            if (authorsNatbib != null)
                  return authorsNatbib;

            StringBuffer res = new StringBuffer();
            if (size() > 0) {
                  res.append(getAuthor(0).getLastOnly());
                  if (size() == 2) {
                        res.append(" and ");
                        res.append(getAuthor(1).getLastOnly());
                  } else if (size() > 2) {
                        res.append(" et al.");
                  }
            }
            authorsNatbib = res.toString();
            return authorsNatbib;
      }

      /**
       * Returns the list of authors separated by commas with last name only; If
       * the list consists of three or more authors, "and" is inserted before the
       * last author's name.
       * <p>
       * 
       * <ul>
       * <li> "John Smith" ==> "Smith"</li>
       * <li> "John Smith and Black Brown, Peter" ==> "Smith and Black Brown"</li>
       * <li> "John von Neumann and John Smith and Black Brown, Peter" ==> "von
       * Neumann, Smith and Black Brown".</li>
       * </ul>
       * 
       * @param oxfordComma
       *            Whether to put a comma before the and at the end.
       * 
       * @see http://en.wikipedia.org/wiki/Serial_comma For an detailed
       *      explaination about the Oxford comma.
       * 
       * @return formatted list of authors.
       */
00678       public String getAuthorsLastOnly(boolean oxfordComma) {
            int abbrInt = (oxfordComma ? 0 : 1);

            // Check if we've computed this before:
            if (authorsLastOnly[abbrInt] != null)
                  return authorsLastOnly[abbrInt];

            StringBuffer res = new StringBuffer();
            if (size() > 0) {
                  res.append(getAuthor(0).getLastOnly());
                  int i = 1;
                  while (i < size() - 1) {
                        res.append(", ");
                        res.append(getAuthor(i).getLastOnly());
                        i++;
                  }
                  if (size() > 2 && oxfordComma)
                        res.append(",");
                  if (size() > 1) {
                        res.append(" and ");
                        res.append(getAuthor(i).getLastOnly());
                  }
            }
            authorsLastOnly[abbrInt] = res.toString();
            return authorsLastOnly[abbrInt];
      }

      /**
       * Returns the list of authors separated by commas with first names after
       * last name; first names are abbreviated or not depending on parameter. If
       * the list consists of three or more authors, "and" is inserted before the
       * last author's name.
       * <p>
       * 
       * <ul>
       * <li> "John Smith" ==> "Smith, John" or "Smith, J."</li>
       * <li> "John Smith and Black Brown, Peter" ==> "Smith, John and Black
       * Brown, Peter" or "Smith, J. and Black Brown, P."</li>
       * <li> "John von Neumann and John Smith and Black Brown, Peter" ==> "von
       * Neumann, John, Smith, John and Black Brown, Peter" or "von Neumann, J.,
       * Smith, J. and Black Brown, P.".</li>
       * </ul>
       * 
       * @param abbreviate
       *            whether to abbreivate first names.
       * 
       * @param oxfordComma
       *            Whether to put a comma before the and at the end.
       * 
       * @see http://en.wikipedia.org/wiki/Serial_comma For an detailed
       *      explaination about the Oxford comma.
       * 
       * @return formatted list of authors.
       */
00732       public String getAuthorsLastFirst(boolean abbreviate, boolean oxfordComma) {
            int abbrInt = (abbreviate ? 0 : 1);
            abbrInt += (oxfordComma ? 0 : 2);

            // Check if we've computed this before:
            if (authorsLastFirst[abbrInt] != null)
                  return authorsLastFirst[abbrInt];

            StringBuffer res = new StringBuffer();
            if (size() > 0) {
                  res.append(getAuthor(0).getLastFirst(abbreviate));
                  int i = 1;
                  while (i < size() - 1) {
                        res.append(", ");
                        res.append(getAuthor(i).getLastFirst(abbreviate));
                        i++;
                  }
                  if (size() > 2 && oxfordComma)
                        res.append(",");
                  if (size() > 1) {
                        res.append(" and ");
                        res.append(getAuthor(i).getLastFirst(abbreviate));
                  }
            }
            authorsLastFirst[abbrInt] = res.toString();
            return authorsLastFirst[abbrInt];
      }
      
      public String toString(){
            return getAuthorsLastFirstAnds(false);
      }

      /**
       * Returns the list of authors separated by "and"s with first names after
       * last name; first names are not abbreviated.
       * <p>
       * <ul>
       * <li>"John Smith" ==> "Smith, John"</li>
       * <li>"John Smith and Black Brown, Peter" ==> "Smith, John and Black
       * Brown, Peter"</li>
       * <li>"John von Neumann and John Smith and Black Brown, Peter" ==> "von
       * Neumann, John and Smith, John and Black Brown, Peter".</li>
       * </ul>
       * 
       * @return formatted list of authors.
       */
00778       public String getAuthorsLastFirstAnds(boolean abbreviate) {
            int abbrInt = (abbreviate ? 0 : 1);
            // Check if we've computed this before:
            if (authorLastFirstAnds[abbrInt] != null)
                  return authorLastFirstAnds[abbrInt];

            StringBuffer res = new StringBuffer();
            if (size() > 0) {
                  res.append(getAuthor(0).getLastFirst(abbreviate));
                  for (int i = 1; i < size(); i++) {
                        res.append(" and ");
                        res.append(getAuthor(i).getLastFirst(abbreviate));
                  }
            }

            authorLastFirstAnds[abbrInt] = res.toString();
            return authorLastFirstAnds[abbrInt];
      }

      public String getAuthorsLastFirstFirstLastAnds(boolean abbreviate) {
            int abbrInt = (abbreviate ? 0 : 1);
            // Check if we've computed this before:
            if (authorsLastFirstFirstLast[abbrInt] != null)
                  return authorsLastFirstFirstLast[abbrInt];

            StringBuffer res = new StringBuffer();
            if (size() > 0) {
            res.append(getAuthor(0).getLastFirst(abbreviate));
                  for (int i = 1; i < size(); i++) {
                        res.append(" and ");
                        res.append(getAuthor(i).getFirstLast(abbreviate));
                  }
            }

            authorsLastFirstFirstLast[abbrInt] = res.toString();
            return authorsLastFirstFirstLast[abbrInt];
      }    

      /**
       * Returns the list of authors separated by commas with first names before
       * last name; first names are abbreviated or not depending on parameter. If
       * the list consists of three or more authors, "and" is inserted before the
       * last author's name.
       * <p>
       * <ul>
       * <li>"John Smith" ==> "John Smith" or "J. Smith"</li>
       * <li>"John Smith and Black Brown, Peter" ==> "John Smith and Peter Black
       * Brown" or "J. Smith and P. Black Brown"</li>
       * <li> "John von Neumann and John Smith and Black Brown, Peter" ==> "John
       * von Neumann, John Smith and Peter Black Brown" or "J. von Neumann, J.
       * Smith and P. Black Brown" </li>
       * </ul>
       * 
       * @param abbr
       *            whether to abbreivate first names.
       * 
       * @param oxfordComma
       *            Whether to put a comma before the and at the end.
       * 
       * @see http://en.wikipedia.org/wiki/Serial_comma For an detailed
       *      explaination about the Oxford comma.
       * 
       * @return formatted list of authors.
       */
00842       public String getAuthorsFirstFirst(boolean abbr, boolean oxfordComma) {

            int abbrInt = (abbr ? 0 : 1);
            abbrInt += (oxfordComma ? 0 : 2);

            // Check if we've computed this before:
            if (authorsFirstFirst[abbrInt] != null)
                  return authorsFirstFirst[abbrInt];

            StringBuffer res = new StringBuffer();
            if (size() > 0) {
                  res.append(getAuthor(0).getFirstLast(abbr));
                  int i = 1;
                  while (i < size() - 1) {
                        res.append(", ");
                        res.append(getAuthor(i).getFirstLast(abbr));
                        i++;
                  }
                  if (size() > 2 && oxfordComma)
                        res.append(",");
                  if (size() > 1) {
                        res.append(" and ");
                        res.append(getAuthor(i).getFirstLast(abbr));
                  }
            }
            authorsFirstFirst[abbrInt] = res.toString();
            return authorsFirstFirst[abbrInt];
      }
      
      /**
       * Compare this object with the given one. 
       * 
       * Will return true iff the other object is an Author and all fields are identical on a string comparison.
       */
00876       public boolean equals(Object o) {
            if (!(o instanceof AuthorList)) {
                  return false;
            }
            AuthorList a = (AuthorList) o;
            
            return this.authors.equals(a.authors);
      }
      
      /**
       * Returns the list of authors separated by "and"s with first names before
       * last name; first names are not abbreviated.
       * <p>
       * <ul>
       * <li>"John Smith" ==> "John Smith"</li>
       * <li>"John Smith and Black Brown, Peter" ==> "John Smith and Peter Black
       * Brown"</li>
       * <li>"John von Neumann and John Smith and Black Brown, Peter" ==> "John
       * von Neumann and John Smith and Peter Black Brown" </li>
       * </li>
       * 
       * @return formatted list of authors.
       */
00899       public String getAuthorsFirstFirstAnds() {
            // Check if we've computed this before:
            if (authorsFirstFirstAnds != null)
                  return authorsFirstFirstAnds;

            StringBuffer res = new StringBuffer();
            if (size() > 0) {
                  res.append(getAuthor(0).getFirstLast(false));
                  for (int i = 1; i < size(); i++) {
                        res.append(" and ");
                        res.append(getAuthor(i).getFirstLast(false));
                  }
            }
            authorsFirstFirstAnds = res.toString();
            return authorsFirstFirstAnds;
      }

      /**
       * Returns the list of authors in a form suitable for alphabetization. This
       * means that last names come first, never preceded by "von" particles, and
       * that any braces are removed. First names are abbreviated so the same name
       * is treated similarly if abbreviated in one case and not in another. This
       * form is not intended to be suitable for presentation, only for sorting.
       * 
       * <p>
       * <ul>
       * <li>"John Smith" ==> "Smith, J.";</li>
       * 
       * 
       * @return formatted list of authors
       */
00930       public String getAuthorsForAlphabetization() {
            if (authorsAlph != null)
                  return authorsAlph;

            StringBuffer res = new StringBuffer();
            if (size() > 0) {
                  res.append(getAuthor(0).getNameForAlphabetization());
                  for (int i = 1; i < size(); i++) {
                        res.append(" and ");
                        res.append(getAuthor(i).getNameForAlphabetization());
                  }
            }
            authorsAlph = res.toString();
            return authorsAlph;
      }

      /**
       * This is an immutable class that keeps information regarding single
       * author. It is just a container for the information, with very simple
       * methods to access it.
       * <p>
       * Current usage: only methods <code>getLastOnly</code>,
       * <code>getFirstLast</code>, and <code>getLastFirst</code> are used;
       * all other methods are provided for completeness.
       */
00955       public static class Author {
            
            private final String first_part;

            private final String first_abbr;

            private final String von_part;

            private final String last_part;

            private final String jr_part;

            /**
             * Compare this object with the given one. 
             * 
             * Will return true iff the other object is an Author and all fields are identical on a string comparison.
             */
00972             public boolean equals(Object o) {
                  if (!(o instanceof Author)) {
                        return false;
                  }
                  Author a = (Author) o;
                  return Util.equals(first_part, a.first_part)
                              && Util.equals(first_abbr, a.first_abbr)
                              && Util.equals(von_part, a.von_part)
                              && Util.equals(last_part, a.last_part)
                              && Util.equals(jr_part, a.jr_part);
            }
            
            /**
             * Creates the Author object. If any part of the name is absent, <CODE>null</CODE>
             * must be passes; otherwise other methods may return erroneous results.
             * 
             * @param first
             *            the first name of the author (may consist of several
             *            tokens, like "Charles Louis Xavier Joseph" in "Charles
             *            Louis Xavier Joseph de la Vall{\'e}e Poussin")
             * @param firstabbr
             *            the abbreviated first name of the author (may consist of
             *            several tokens, like "C. L. X. J." in "Charles Louis
             *            Xavier Joseph de la Vall{\'e}e Poussin"). It is a
             *            responsibility of the caller to create a reasonable
             *            abbreviation of the first name.
             * @param von
             *            the von part of the author's name (may consist of several
             *            tokens, like "de la" in "Charles Louis Xavier Joseph de la
             *            Vall{\'e}e Poussin")
             * @param last
             *            the lats name of the author (may consist of several
             *            tokens, like "Vall{\'e}e Poussin" in "Charles Louis Xavier
             *            Joseph de la Vall{\'e}e Poussin")
             * @param jr
             *            the junior part of the author's name (may consist of
             *            several tokens, like "Jr. III" in "Smith, Jr. III, John")
             */
01010             public Author(String first, String firstabbr, String von, String last, String jr) {
                  first_part = first;
                  first_abbr = firstabbr;
                  von_part = von;
                  last_part = last;
                  jr_part = jr;
            }

            /**
             * Returns the first name of the author stored in this object ("First").
             * 
             * @return first name of the author (may consist of several tokens)
             */
01023             public String getFirst() {
                  return first_part;
            }

            /**
             * Returns the abbreviated first name of the author stored in this
             * object ("F.").
             * 
             * @return abbreviated first name of the author (may consist of several
             *         tokens)
             */
01034             public String getFirstAbbr() {
                  return first_abbr;
            }

            /**
             * Returns the von part of the author's name stored in this object
             * ("von").
             * 
             * @return von part of the author's name (may consist of several tokens)
             */
01044             public String getVon() {
                  return von_part;
            }

            /**
             * Returns the last name of the author stored in this object ("Last").
             * 
             * @return last name of the author (may consist of several tokens)
             */
01053             public String getLast() {
            return last_part;
            }

            /**
             * Returns the junior part of the author's name stored in this object
             * ("Jr").
             * 
             * @return junior part of the author's name (may consist of several
             *         tokens) or null if the author does not have a Jr. Part
             */
01064             public String getJr() {
                  return jr_part;
            }

            /**
             * Returns von-part followed by last name ("von Last"). If both fields
             * were specified as <CODE>null</CODE>, the empty string <CODE>""</CODE>
             * is returned.
             * 
             * @return 'von Last'
             */
01075             public String getLastOnly() {
                  if (von_part == null) {
                        return (last_part == null ? "" : last_part);
                  } else {
                        return (last_part == null ? von_part : von_part + " " + last_part);
                  }
            }

            /**
             * Returns the author's name in form 'von Last, Jr., First' with the
             * first name full or abbreviated depending on parameter.
             * 
             * @param abbr
             *            <CODE>true</CODE> - abbreviate first name, <CODE>false</CODE> -
             *            do not abbreviate
             * @return 'von Last, Jr., First' (if <CODE>abbr==false</CODE>) or
             *         'von Last, Jr., F.' (if <CODE>abbr==true</CODE>)
             */
01093             public String getLastFirst(boolean abbr) {
                  String res = getLastOnly();
                  if (jr_part != null)
                        res += ", " + jr_part;
                  if (abbr) {
                        if (first_abbr != null)
                              res += ", " + first_abbr;
                  } else {
                        if (first_part != null)
                              res += ", " + first_part;
                  }
                  return res;
            }

            /**
             * Returns the author's name in form 'First von Last, Jr.' with the
             * first name full or abbreviated depending on parameter.
             * 
             * @param abbr
             *            <CODE>true</CODE> - abbreviate first name, <CODE>false</CODE> -
             *            do not abbreviate
             * @return 'First von Last, Jr.' (if <CODE>abbr==false</CODE>) or 'F.
             *         von Last, Jr.' (if <CODE>abbr==true</CODE>)
             */
01117             public String getFirstLast(boolean abbr) {
                  String res = getLastOnly();
                  if (abbr) {
                        res = (first_abbr == null ? "" : first_abbr + " ") + res;
                  } else {
                        res = (first_part == null ? "" : first_part + " ") + res;
                  }
                  if (jr_part != null)
                        res += ", " + jr_part;
                  return res;
            }

            /**
             * Returns the name as "Last, Jr, F." omitting the von-part and removing
             * starting braces.
             * 
             * @return "Last, Jr, F." as described above or "" if all these parts
             *         are empty.
             */
01136             public String getNameForAlphabetization() {
                  StringBuffer res = new StringBuffer();
                  if (last_part != null)
                        res.append(last_part);
                  if (jr_part != null) {
                        res.append(", ");
                        res.append(jr_part);
                  }
                  if (first_abbr != null) {
                        res.append(", ");
                        res.append(first_abbr);
                  }
                  while ((res.length() > 0) && (res.charAt(0) == '{'))
                        res.deleteCharAt(0);
                  return res.toString();
            }
      }// end Author


    public static void main(String[] args) {
        //String s = "Ford, Jr., Henry and Guy L. {Steele Jr.} and Olaf Nilsen, Jr.";
        String s = "Olaf von Nilsen, Jr.";
        AuthorList al = AuthorList.getAuthorList(s);
        for (int i=0; i<al.size(); i++) {
            Author a = al.getAuthor(i);
            System.out.println((i+1)+": first = '"+a.getFirst()+"'");
            System.out.println((i+1)+": last = '"+a.getLast()+"'");
            System.out.println((i+1)+": jr = '"+a.getJr()+"'");
            System.out.println((i+1)+": von = '"+a.getVon()+"'");
        }

        System.out.println((new CreateDocBookAuthors()).format(s));
    }
}// end AuthorList

Generated by  Doxygen 1.6.0   Back to index