Logo Search packages:      
Sourcecode: jabref version File versions  Download package

IEEEXploreFetcher.java

package net.sf.jabref.imports;

import java.awt.BorderLayout;

import java.io.BufferedReader;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;

import java.net.ConnectException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.swing.ButtonGroup;
import javax.swing.JCheckBox;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.swing.JRadioButton;

import net.sf.jabref.BibtexDatabase;
import net.sf.jabref.BibtexEntry;
import net.sf.jabref.BibtexEntryType;
import net.sf.jabref.GUIGlobals;
import net.sf.jabref.Globals;
import net.sf.jabref.OutputPrinter;
import net.sf.jabref.Util;

public class IEEEXploreFetcher implements EntryFetcher {

    ImportInspector dialog = null;
      OutputPrinter status;
    final HTMLConverter htmlConverter = new HTMLConverter();
    
    private JCheckBox absCheckBox = new JCheckBox(Globals.lang("Include abstracts"), false);
    private JRadioButton htmlButton = new JRadioButton(Globals.lang("HTML parser"));
    private JRadioButton bibButton = new JRadioButton(Globals.lang("BibTeX importer"));
    
    private static final int MAX_FETCH = 100;
    private int perPage = MAX_FETCH, hits = 0, unparseable = 0, parsed = 0;
    private int piv = 0;
    private boolean shouldContinue = false;
    private boolean includeAbstract = false;
    private boolean importBibtex = false;
    
    private String terms;
    private final String startUrl = "http://ieeexplore.ieee.org/search/freesearchresult.jsp?queryText=";
    private final String endUrl = "&rowsPerPage=" + Integer.toString(perPage) + "&pageNumber=";
    private String searchUrl;
    private final String importUrl = "http://ieeexplore.ieee.org/xpls/downloadCitations";
    
    private final Pattern hitsPattern = Pattern.compile("([0-9,]+) results");
    private final Pattern idPattern = Pattern.compile("<input name=\"\" type=\"checkbox\" value=\"\"\\s*" +
            "id=\"([0-9]+)\"/>");
    private final Pattern typePattern = Pattern.compile("<span class=\"type\">\\s*(.+)");
    private HashMap<String, String> fieldPatterns = new HashMap<String, String>();
    private final Pattern absPattern = Pattern.compile("<p>\\s*(.+)");
    
    Pattern stdEntryPattern = Pattern.compile(".*<strong>(.+)</strong><br>"
                  + "\\s+(.+)");
    
    Pattern publicationPattern = Pattern.compile("(.*), \\d*\\.*\\s?(.*)");
    Pattern proceedingPattern = Pattern.compile("(.*?)\\.?\\s?Proceedings\\s?(.*)");
    Pattern abstractLinkPattern = Pattern.compile(
            "<a href=\"(.+)\" class=\"bodyCopySpaced\">Abstract</a>");
    String abrvPattern = ".*[^,] '?\\d+\\)?";

    Pattern ieeeArticleNumberPattern = Pattern.compile("<a href=\".*arnumber=(\\d+).*\">");
    
    public IEEEXploreFetcher() {
      super();
      
      fieldPatterns.put("title", "<a\\s*href=[^<]+>\\s*(.+)\\s*</a>");
        fieldPatterns.put("author", "<p>\\s+(.+)");
        fieldPatterns.put("volume", "Volume:\\s*(\\d+)");
        fieldPatterns.put("number", "Issue:\\s*(\\d+)");
        //fieldPatterns.put("part", "Part (\\d+),&nbsp;(.+)");
        fieldPatterns.put("year", "Publication Year:\\s*(\\d{4})");
        fieldPatterns.put("pages", "Page\\(s\\):\\s*(\\d+)\\s*-\\s*(\\d*)");
        fieldPatterns.put("doi", "Digital Object Identifier:\\s*<a href=.*>(.+)</a>");
    }
    public JPanel getOptionsPanel() {
        JPanel pan = new JPanel();
        pan.setLayout(new BorderLayout());
        htmlButton.setSelected(true);
        htmlButton.setEnabled(false);
        bibButton.setEnabled(false);
        
        ButtonGroup group = new ButtonGroup();
        group.add(htmlButton);
        group.add(bibButton);
        pan.add(absCheckBox, BorderLayout.NORTH);
        pan.add(htmlButton, BorderLayout.CENTER);
        pan.add(bibButton, BorderLayout.EAST);
            
        return pan;
    }

    public boolean processQuery(String query, ImportInspector dialog, OutputPrinter status) {
        this.dialog = dialog;
        this.status = status;
        terms = query;
        piv = 0;
        shouldContinue = true;
        parsed = 0;
        unparseable = 0;
        int pageNumber = 1;
        
        searchUrl = makeUrl(pageNumber);//start at page 1
        
        try {
            URL url = new URL(searchUrl);
            String page = getResults(url);
            
            if (page.indexOf("You have entered an invalid search") >= 0) {
                status.showMessage(Globals.lang("You have entered an invalid search '%0'.",
                        terms),
                        Globals.lang("Search IEEEXplore"), JOptionPane.INFORMATION_MESSAGE);
                return false;
            }
            
            if (page.indexOf("Bad request") >= 0) {
                  status.showMessage(Globals.lang("Bad Request '%0'.",
                        terms),
                        Globals.lang("Search IEEEXplore"), JOptionPane.INFORMATION_MESSAGE);
                return false;
            }
            
            if (page.indexOf("No results were found.") >= 0) {
                status.showMessage(Globals.lang("No entries found for the search string '%0'",
                        terms),
                        Globals.lang("Search IEEEXplore"), JOptionPane.INFORMATION_MESSAGE);
                return false;
            }
            
            hits = getNumberOfHits(page, "display-status", hitsPattern);


            includeAbstract = absCheckBox.isSelected();
            importBibtex = bibButton.isSelected();
            
            if (hits > MAX_FETCH) {
                  status.showMessage(Globals.lang("%0 entries found. To reduce server load, "
                       +"only %1 will be downloaded.",
                                new String[] {String.valueOf(hits), String.valueOf(MAX_FETCH)}),
                        Globals.lang("Search IEEEXplore"), JOptionPane.INFORMATION_MESSAGE);
                  hits = MAX_FETCH;
            }

            parse(dialog, page, 0, 1);
            int firstEntry = perPage;
            while (shouldContinue && firstEntry < hits) {
                  pageNumber++;
                searchUrl = makeUrl(pageNumber);
                page = getResults(new URL(searchUrl));

                if (!shouldContinue)
                    break;

                parse(dialog, page, 0, firstEntry + 1);
                firstEntry += perPage;

            }
            return true;
        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (ConnectException e) {
            status.showMessage(Globals.lang("Connection to IEEEXplore failed"),
                    Globals.lang("Search IEEEXplore"), JOptionPane.ERROR_MESSAGE);
        } catch (IOException e) {
            status.showMessage(Globals.lang(e.getMessage()),
                    Globals.lang("Search IEEEXplore"), JOptionPane.ERROR_MESSAGE);
            e.printStackTrace();
        }
        return false;
    }

    public String getTitle() {
        return Globals.menuTitle("Search IEEEXplore");
    }

    public URL getIcon() {
        return GUIGlobals.getIconUrl("www");
    }

    public String getHelpPage() {
        return "IEEEXploreHelp.html";
    }

    public String getKeyName() {
        return "Search IEEEXplore";
    }

    /**
     * This method is called by the dialog when the user has cancelled the import.
     */
    public void stopFetching() {
        shouldContinue = false;
    }

    private String makeUrl(int startIndex) {
        StringBuffer sb = new StringBuffer(startUrl);
        sb.append(terms.replaceAll(" ", "+"));
        sb.append(endUrl);
        sb.append(String.valueOf(startIndex));
        return sb.toString();
    }

    

    private void parse(ImportInspector dialog, String text, int startIndex, int firstEntryNumber) {
        piv = startIndex;
        int entryNumber = firstEntryNumber;
        
        if (importBibtex) {
                  //TODO: Login
            ArrayList<String> idSelected = new ArrayList<String>();
            String id;
                  while ((id = parseNextEntryId(text, piv)) != null && shouldContinue) {
                  idSelected.add(id);
                  entryNumber++;
              }
                  try {
                        BibtexDatabase dbase = parseBibtexDatabase(idSelected, includeAbstract);
                        Collection<BibtexEntry> items = dbase.getEntries();
                        Iterator<BibtexEntry> iter = items.iterator();
                        while (iter.hasNext()) {
                              BibtexEntry entry = iter.next();
                              dialog.addEntry(cleanup(entry));
                      dialog.setProgress(parsed + unparseable, hits);
                      parsed++;
                        }
                  } catch (IOException e) {
                        e.printStackTrace();
                  }
                  //for
        } else {
            BibtexEntry entry;
              while (((entry = parseNextEntry(text, piv)) != null) && shouldContinue) {
                  if (entry.getField("title") != null) {
                      dialog.addEntry(entry);
                      dialog.setProgress(parsed + unparseable, hits);
                      parsed++;
                  }
                  entryNumber++;
              }
        }
    }

    private BibtexDatabase parseBibtexDatabase(List<String> id, boolean abs) throws IOException {
      if (id.isEmpty())
            return null;
        URL url;
        URLConnection conn;
        try {
            url = new URL(importUrl);
            conn = url.openConnection();
        } catch (MalformedURLException e) {
            e.printStackTrace();
            return null;
        }
        conn.setDoInput(true);
        conn.setDoOutput(true);
        conn.setRequestProperty("Content-Type",
                "application/x-www-form-urlencoded");
        conn.setRequestProperty("Referer", searchUrl);
        PrintWriter out = new PrintWriter(
                conn.getOutputStream());

            String recordIds = "";
            Iterator<String> iter = id.iterator();
            while (iter.hasNext()) { 
            recordIds += iter.next() + " ";
            }
            recordIds = recordIds.trim();
            String citation = abs ? "citation-abstract" : "citation-only";
            
            String content = "recordIds=" + recordIds.replaceAll(" ", "%20") + "&fromPageName=&citations-format=" + citation + "&download-format=download-bibtex";
            System.out.println(content);
        out.write(content);
        out.flush();
        out.close();

        BufferedReader bufr = new BufferedReader(new InputStreamReader(conn.getInputStream()));
        StringBuffer sb = new StringBuffer();
        char[] buffer = new char[256];
        while(true) {
            int bytesRead = bufr.read(buffer);
            if(bytesRead == -1) break;
            for (int i=0; i<bytesRead; i++)
                sb.append((char)buffer[i]);
        }
        System.out.println(sb.toString());
        
        ParserResult results = new BibtexParser(bufr).parse();
        bufr.close();
        return results.getDatabase();
    }

    private BibtexEntry cleanup(BibtexEntry entry) {
      if (entry == null)
            return null;
      
      // clean up author
      String author = (String)entry.getField("author");
      if (author != null) {
            author = author.replaceAll("\\.", ". ");
            author = author.replaceAll("  ", " ");
            author = author.replaceAll("\\. -", ".-");
            author = author.replaceAll("; ", " and ");
            author = author.replaceAll("[,;]$", "");
            entry.setField("author", author);
      }
      // clean up month
      String month = (String)entry.getField("month");
      if ((month != null) && (month.length() > 0)) {
            month = month.replaceAll("\\.", "");
            month = month.toLowerCase();

            Pattern monthPattern = Pattern.compile("(\\d*+)\\s*([a-z]*+)-*(\\d*+)\\s*([a-z]*+)");
            Matcher mm = monthPattern.matcher(month);
            String date = month;
            if (mm.find()) {
                  if (mm.group(3).length() == 0) {
                        if (mm.group(2).length() > 0) {
                              date = "#" + mm.group(2).substring(0, 3) + "#";
                              if (mm.group(1).length() > 0) {
                                    date += " " + mm.group(1) + ",";
                              }
                        } else {
                              date = mm.group(1) + ",";
                        }
                  } else if (mm.group(2).length() == 0) {
                        if (mm.group(4).length() > 0) {
                              date = "#" + mm.group(4).substring(0, 3) + "# " + mm.group(1) + "--" + mm.group(3) + ",";
                        } else
                              date += ",";
                  } else {
                        date = "#" + mm.group(2).substring(0, 3) + "# " + mm.group(1) + "--#" + mm.group(4).substring(0, 3) + "# " + mm.group(3) + ",";
                  }
            }
            //date = date.trim();
            //if (!date.isEmpty()) {
            entry.setField("month", date);
            //}
      }
      
      // clean up pages
      String field = "pages";
      String pages = entry.getField(field);
      if (pages != null) {
            String [] pageNumbers = pages.split("-");
            if (pageNumbers.length == 2) {
                  if (pageNumbers[0].equals(pageNumbers[1])) {// single page
                        entry.setField(field, pageNumbers[0]);
                  } else {
                        entry.setField(field, pages.replaceAll("-", "--"));
                  }
            }
      }
      
      // clean up publication field
      BibtexEntryType type = entry.getType();
      String sourceField = "";
            if (type.getName() == "Article") {
            sourceField = "journal";
                  entry.clearField("booktitle");
            } else if (type.getName() == "Inproceedings"){
            sourceField = "booktitle";
            }
        String fullName = entry.getField(sourceField);
        if (fullName != null) {
              if (type.getName() == "Article") {
                  int ind = fullName.indexOf(": Accepted for future publication");
                        if (ind > 0) {
                              fullName = fullName.substring(0, ind);
                              entry.setField("year", "to be published");
                              entry.clearField("month");
                              entry.clearField("pages");
                        }
                    String[] parts = fullName.split("[\\[\\]]"); //[see also...], [legacy...]
                    fullName = parts[0];
                    if (parts.length == 3) {
                              fullName += parts[2];
                        }
              } else {
                  fullName = fullName.replace("Conference Proceedings", "Proceedings").
                              replace("Proceedings of", "Proceedings").replace("Proceedings.", "Proceedings");
                  fullName = fullName.replaceAll("International", "Int.");
                  fullName = fullName.replaceAll("Symposium", "Symp.");
                  fullName = fullName.replaceAll("Conference", "Conf.");
                  fullName = fullName.replaceAll(" on", " ").replace("  ", " ");
              }
              
              Matcher m1 = publicationPattern.matcher(fullName);
                  if (m1.find()) {
                        String prefix = m1.group(2).trim();
                        String postfix = m1.group(1).trim();
                        String abrv = "";
                        String[] parts = prefix.split("\\. ", 2);
                        if (parts.length == 2) {
                              if (parts[0].matches(abrvPattern)) {
                                    prefix = parts[1];
                                    abrv = parts[0];
                              } else {
                                    prefix = parts[0];
                                    abrv = parts[1];
                              }
                        }
                        if (prefix.matches(abrvPattern) == false) {
                              fullName = prefix + " " + postfix + " " + abrv;
                              fullName = fullName.trim();
                        } else {
                              fullName = postfix + " " + prefix;
                        }
                  }
                  if (type.getName() == "Article") {
                        fullName = fullName.replace("- ", "-"); //IEE Proceedings-
                        
                        fullName = fullName.trim();
                        if (Globals.prefs.getBoolean("useIEEEAbrv")) {
                              String id = Globals.journalAbbrev.getAbbreviatedName(fullName, false);
                              if (id != null)
                                    fullName = id;
                        }
              }
                  if (type.getName() == "Inproceedings") {
                  Matcher m2 = proceedingPattern.matcher(fullName);
                        if (m2.find()) {
                              String prefix = m2.group(2); 
                              String postfix = m2.group(1).replaceAll("\\.$", "");
                              if (prefix.matches(abrvPattern) == false) {
                                    String abrv = "";
                              
                                    String[] parts = postfix.split("\\. ", 2);
                                    if (parts.length == 2) {
                                          if (parts[0].matches(abrvPattern)) {
                                                postfix = parts[1];
                                                abrv = parts[0];
                                          } else {
                                                postfix = parts[0];
                                                abrv = parts[1];
                                          }
                                    }
                                    fullName = prefix.trim() + " " + postfix.trim() + " " + abrv;
                                    
                              } else {
                                    fullName = postfix.trim() + " " + prefix.trim();
                              }
                              
                        }
                        
                        fullName = fullName.trim();
                        
                        fullName = fullName.replaceAll("^[tT]he ", "").replaceAll("^\\d{4} ", "").replaceAll("[,.]$", "");
                        String year = entry.getField("year");
                        fullName = fullName.replaceAll(", " + year + "\\.?", "");
                        
                  if (fullName.contains("Abstract") == false && fullName.contains("Summaries") == false && fullName.contains("Conference Record") == false)
                        fullName = "Proc. " + fullName;
              }
                  entry.setField(sourceField, fullName);
        }
            return entry;
    }

    private String parseNextEntryId(String allText, int startIndex) {
          int index = allText.indexOf("<div class=\"select", startIndex);
          int endIndex = allText.indexOf("</div>", index);
          
          if (index >= 0 && endIndex > 0) {
            String text = allText.substring(index, endIndex);
            endIndex += 6;
            piv = endIndex;
            //parse id
            Matcher idMatcher = idPattern.matcher(text);
            //add id into a vector
            if (idMatcher.find()) {
                  return idMatcher.group(1);
            }
          }
          return null;
    }
    
    private BibtexEntry parseNextEntry(String allText, int startIndex) {
        BibtexEntry entry = null;
        
      int index = allText.indexOf("<div class=\"detail", piv);
        int endIndex = allText.indexOf("</div>", index);

        if (index >= 0 && endIndex > 0) {
            endIndex += 6;
            piv = endIndex;
            String text = allText.substring(index, endIndex);
            
            BibtexEntryType type = null;
            String sourceField = null;
            
            String typeName = "";
            Matcher typeMatcher = typePattern.matcher(text);
            if (typeMatcher.find()) {
                  typeName = typeMatcher.group(1);
                  if (typeName.equalsIgnoreCase("IEEE Journals") || typeName.equalsIgnoreCase("IEEE Early Access") ||
                              typeName.equalsIgnoreCase("IET Journals") || typeName.equalsIgnoreCase("AIP Journals") ||
                                    typeName.equalsIgnoreCase("AVS Journals") || typeName.equalsIgnoreCase("IBM Journals")) {
                      type = BibtexEntryType.getType("article");
                      sourceField = "journal";
                  } else if (typeName.equalsIgnoreCase("IEEE Conferences") || typeName.equalsIgnoreCase("IET Conferences")) {
                      type = BibtexEntryType.getType("inproceedings");
                      sourceField = "booktitle";
                    } else if (typeName.equalsIgnoreCase("IEEE Standards")) {
                      type = BibtexEntryType.getType("standard");
                      sourceField = "number";
                    } else if (typeName.equalsIgnoreCase("IEEE Educational Courses")) {
                        type = BibtexEntryType.getType("Electronic");
                        sourceField = "note";
                    } else if (typeName.equalsIgnoreCase("IEEE Book Chapter")) {
                        type = BibtexEntryType.getType("inCollection");
                        sourceField = "booktitle";
                    }
            }
            
            if (type == null) {
                  type = BibtexEntryType.getType("misc");
                  sourceField = "note";
                System.err.println("Type detection failed. Use MISC instead.");
                unparseable++;
                System.err.println(text);
            }
        
            entry = new BibtexEntry(Util.createNeutralId(), type);
            
            if (typeName.equalsIgnoreCase("IEEE Standards")) {
                  entry.setField("organization", "IEEE");
            }
            
            if (typeName.equalsIgnoreCase("IEEE Book Chapter")) {
                  entry.setField("publisher", "IEEE");
            }
            
            if (typeName.equalsIgnoreCase("IEEE Early Access")) {
                  entry.setField("note", "Early Access");
            }
            
            Set<String> fields = fieldPatterns.keySet();
            for (String field: fields) {
                  Matcher fieldMatcher = Pattern.compile(fieldPatterns.get(field)).matcher(text);
                  if (fieldMatcher.find()) {
                        entry.setField(field, htmlConverter.format(fieldMatcher.group(1)));
                        if (field.equals("title") && fieldMatcher.find()) {
                              String sec_title = htmlConverter.format(fieldMatcher.group(1));
                              if (entry.getType() == BibtexEntryType.getStandardType("standard")) {
                                    sec_title = sec_title.replaceAll("IEEE Std ", "");
                              }
                              entry.setField(sourceField, sec_title);
                              
                        }
                        if (field.equals("pages") && fieldMatcher.groupCount() == 2) {
                              entry.setField(field, fieldMatcher.group(1) + "-" + fieldMatcher.group(2));
                        }
                  }
            }
            if (entry.getType() == BibtexEntryType.getStandardType("inproceedings") && entry.getField("author").equals("")) {
                  entry.setType(BibtexEntryType.getStandardType("proceedings"));
            }
        
            if (includeAbstract) {
                  index = allText.indexOf("<div class=\"abstract RevealContent", piv);
                  if (index >= 0) {
                        endIndex = allText.indexOf("</div>", index) + 6;
                        piv = endIndex;
                        
                        text = allText.substring(index, endIndex);
                        Matcher absMatcher = absPattern.matcher(text);
                        if (absMatcher.find()) {
                              entry.setField("abstract", absMatcher.group(1));
                        }
                  }
            }
        }
        
        if (entry == null) {
            return null;
        } else {
            return cleanup(entry);
        }
    }

    /**
     * Find out how many hits were found.
     * @param page
     */
    private int getNumberOfHits(String page, String marker, Pattern pattern) throws IOException {
      int ind = page.indexOf(marker);
        if (ind < 0) {
            System.out.println(page);
            throw new IOException(Globals.lang("Could not parse number of hits"));
        }
        String substring = page.substring(ind, page.length());
        Matcher m = pattern.matcher(substring);
        if (m.find())
            return Integer.parseInt(m.group(1));
        else
            throw new IOException(Globals.lang("Could not parse number of hits"));
    }

    /**
     * Download the URL and return contents as a String.
     * @param source
     * @return
     * @throws IOException
     */
    public String getResults(URL source) throws IOException {
        
        InputStream in = source.openStream();
        StringBuffer sb = new StringBuffer();
        byte[] buffer = new byte[256];
        while(true) {
            int bytesRead = in.read(buffer);
            if(bytesRead == -1) break;
            for (int i=0; i<bytesRead; i++)
                sb.append((char)buffer[i]);
        }
        return sb.toString();
    }

    /**
     * Read results from a file instead of an URL. Just for faster debugging.
     * @param f
     * @return
     * @throws IOException
     */
    public String getResultsFromFile(File f) throws IOException {
        InputStream in = new BufferedInputStream(new FileInputStream(f));
        StringBuffer sb = new StringBuffer();
        byte[] buffer = new byte[256];
        while(true) {
            int bytesRead = in.read(buffer);
            if(bytesRead == -1) break;
            for (int i=0; i<bytesRead; i++)
                sb.append((char)buffer[i]);
        }
        return sb.toString();
    }
}

Generated by  Doxygen 1.6.0   Back to index