Logo Search packages:      
Sourcecode: jabref version File versions  Download package

ACMPortalFetcher.java

/* Aaron Chen
 * 08-28-2007
 * ACM Portal support
 */

package net.sf.jabref.imports;

import java.awt.GridLayout;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.ConnectException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.swing.ButtonGroup;
import javax.swing.JCheckBox;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.swing.JRadioButton;

import net.sf.jabref.BibtexEntry;
import net.sf.jabref.GUIGlobals;
import net.sf.jabref.Globals;
import net.sf.jabref.OutputPrinter;

public class ACMPortalFetcher implements EntryFetcher {

      ImportInspector dialog = null;
      OutputPrinter status;
    final HTMLConverter htmlConverter = new HTMLConverter();
    private String terms;
    String startUrl = "http://portal.acm.org/";
    String searchUrlPart = "results.cfm?query=";
    String searchUrlPartII = "&dl=";
    String endUrl = "&coll=Portal&short=0";//&start=";

    private JRadioButton acmButton = new JRadioButton(Globals.lang("The ACM Digital Library"));
    private JRadioButton guideButton = new JRadioButton(Globals.lang("The Guide to Computing Literature"));
    private JCheckBox absCheckBox = new JCheckBox(Globals.lang("Include abstracts"), false);
    
    private static final int MAX_FETCH = 20; // 20 when short=0
    private int perPage = MAX_FETCH, hits = 0, unparseable = 0, parsed = 0;
    private boolean shouldContinue = false;
    private boolean fetchAbstract = false;
    private boolean acmOrGuide = false;

    Pattern hitsPattern = Pattern.compile(".*Found <b>(\\d+,*\\d*)</b> of.*");
    Pattern maxHitsPattern = Pattern.compile(".*Results \\d+ - \\d+ of (\\d+,*\\d*).*");
    Pattern bibPattern = Pattern.compile(".*(popBibTex.cfm.*)','BibTex'.*");
    Pattern absPattern = Pattern.compile(".*ABSTRACT</A></span>\\s+<p class=\"abstract\">\\s+(.*)");
    
    Pattern fullCitationPattern =
        Pattern.compile("<A HREF=\"(citation.cfm.*)\" class.*");

    public JPanel getOptionsPanel() {
        JPanel pan = new JPanel();
        pan.setLayout(new GridLayout(0,1));

        guideButton.setSelected(true);
        
        ButtonGroup group = new ButtonGroup();
        group.add(acmButton);
        group.add(guideButton);
        
        pan.add(absCheckBox);
        pan.add(acmButton);
        pan.add(guideButton);
        
        return pan;
    }

    public boolean processQuery(String query, ImportInspector dialog, OutputPrinter status) {
        this.dialog = dialog;
        this.status = status;
        this.terms = query;
        piv = 0;
        shouldContinue = true;
        parsed = 0;
        unparseable = 0;
        acmOrGuide = acmButton.isSelected();
        String address = makeUrl(0);
        try {
            URL url = new URL(address);

            //dialog.setVisible(true);
            String page = getResults(url);
            //System.out.println(address);
            hits = getNumberOfHits(page, "Found", hitsPattern);
                  int index = page.indexOf("Found");
                  if (index >= 0) {
                  page = page.substring(index + 5);
                        index = page.indexOf("Found");
                        if (index >= 0)
                        page = page.substring(index);
                  }
            //System.out.println(page);
            //System.out.printf("Hit %d\n", hits);
            
            if (hits == 0) {
                status.showMessage(Globals.lang("No entries found for the search string '%0'",
                        terms),
                        Globals.lang("Search ACM Portal"), JOptionPane.INFORMATION_MESSAGE);
                return false;
            }

            int maxHits = getNumberOfHits(page, "Results", maxHitsPattern);
            //System.out.printf("maxHit %d\n", maxHits);
            //String page = getResultsFromFile(new File("/home/alver/div/temp50.txt"));

            //List entries = new ArrayList();
            //System.out.println("Number of hits: "+hits);
            //System.out.println("Maximum returned: "+maxHits);
            if (hits > maxHits)
                hits = maxHits;
            
            if (hits > MAX_FETCH) {
                status.showMessage(Globals.lang("%0 entries found. To reduce server load, "
                        +"only %1 will be downloaded. It will be very slow, in order to make ACM happy.",
                                new String[] {String.valueOf(hits), String.valueOf(MAX_FETCH)}),
                        Globals.lang("Search ACM Portal"), JOptionPane.INFORMATION_MESSAGE);
                hits = MAX_FETCH;
            }
        
            fetchAbstract = absCheckBox.isSelected();
            //parse(dialog, page, 0, 51);
            //dialog.setProgress(perPage/2, hits);
            parse(dialog, page, 0, 1);
            //System.out.println(page);
            int firstEntry = perPage;
            while (shouldContinue && (firstEntry < hits)) {
                //System.out.println("Fetching from: "+firstEntry);
                address = makeUrl(firstEntry);
                //System.out.println(address);
                page = getResults(new URL(address));
                
                //dialog.setProgress(firstEntry+perPage/2, hits);
                if (!shouldContinue)
                    break;

                parse(dialog, page, 0, 1+firstEntry);
                firstEntry += perPage;
            }
            return true;
        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (ConnectException e) {
            status.showMessage(Globals.lang("Connection to ACM Portal failed"),
                    Globals.lang("Search ACM Portal"), JOptionPane.ERROR_MESSAGE);
        } catch (IOException e) {
            status.showMessage(Globals.lang(e.getMessage()),
                    Globals.lang("Search ACM Portal"), JOptionPane.ERROR_MESSAGE);
            e.printStackTrace();
        }
        return false;
    }

    private String makeUrl(int startIndex) {
        StringBuffer sb = new StringBuffer(startUrl).append(searchUrlPart);
        sb.append(terms.replaceAll(" ", "%20"));
        sb.append(searchUrlPartII);
        if (acmOrGuide)
            sb.append("ACM");
        else
            sb.append("GUIDE");
        sb.append(endUrl);
        return sb.toString();
    }

    int piv = 0;

    private void parse(ImportInspector dialog, String text, int startIndex, int firstEntryNumber) {
        piv = startIndex;
        int entryNumber = firstEntryNumber;
        BibtexEntry entry;
        while (((entry = parseNextEntry(text, piv, entryNumber)) != null)
            && (shouldContinue)) {
            if (entry.getField("title") != null) {
                dialog.addEntry(entry);
                dialog.setProgress(parsed + unparseable, hits);
                parsed++;
            }
            entryNumber++;
            try {
                  Thread.sleep(10000);//wait between requests or you will be blocked by ACM
            } catch (InterruptedException e) {
                  System.err.println(e.getStackTrace());
            }
        }
    }

    private BibtexEntry parseEntryBibTeX(String fullCitation, boolean abs) throws IOException {
        URL url;
        try {
            url = new URL(startUrl + fullCitation);
            String page = getResults(url);
                  Thread.sleep(10000);//wait between requests or you will be blocked by ACM
                  Matcher bibtexAddr = bibPattern.matcher(page);
                  if (bibtexAddr.find()) {
                        URL bibtexUrl = new URL(startUrl + bibtexAddr.group(1));
                        BufferedReader in = new BufferedReader(new InputStreamReader(bibtexUrl.openStream()));
                        ParserResult result = BibtexParser.parse(in);
                        in.close();
                        Collection<BibtexEntry> item = result.getDatabase().getEntries();
                        BibtexEntry entry = item.iterator().next();
                        if (abs == true) {
                              Matcher absMatch = absPattern.matcher(page);
                              if (absMatch.find()) {
                                    String absBlock = absMatch.group(1);
                                    entry.setField("abstract", convertHTMLChars(absBlock).trim());
                              } else {
                                    System.out.println("No abstract matched.");
                                    //System.out.println(page);
                              }
                        }
                        
                        Thread.sleep(10000);//wait between requests or you will be blocked by ACM
                        return entry;
                  } else
                        return null;
        } catch (MalformedURLException e) {
            e.printStackTrace();
            return null;
        } catch (ConnectException e) {
            e.printStackTrace();
            return null;
            } catch (IOException e) {
                  e.printStackTrace();
                  return null;
            } catch (InterruptedException e) {
                  e.printStackTrace();
                  return null;
            }
    }

    private BibtexEntry parseNextEntry(String allText, int startIndex, int entryNumber) {
        String toFind = new StringBuffer().append("<strong>")
                .append(entryNumber).append("</strong>").toString();
        int index = allText.indexOf(toFind, startIndex);
        int endIndex = allText.indexOf("</table>", index+1);
        //if (endIndex < 0)
            endIndex = allText.length();

        BibtexEntry entry = null;

        if (index >= 0) {
            piv = index+1;
            String text = allText.substring(index, endIndex);
            // Always try RIS import first
                  Matcher fullCitation =
                        fullCitationPattern.matcher(text);
                  if (fullCitation.find()) {
                        try {
                              Thread.sleep(10000);//wait between requests or you will be blocked by ACM
                              entry = parseEntryBibTeX(fullCitation.group(1), fetchAbstract);
                        } catch (Exception e) {
                              e.printStackTrace();
                        }  
                  } else {
                        System.out.printf("Citation Unmatched %d\n", entryNumber);
                        System.out.printf(text);
                  }
            if (entry != null) { // fetch successful
                return entry;
            }
        }
        //System.out.println(allText);
        //System.out.println(toFind);
        //System.out.println("Parse Failed");
        return null;
    }

    /**
     * This method must convert HTML style char sequences to normal characters.
     * @param text The text to handle.
     * @return The converted text.
     */
    private String convertHTMLChars(String text) {

        return htmlConverter.format(text);
    }


    /**
     * Find out how many hits were found.
     * @param page
     */
    private int getNumberOfHits(String page, String marker, Pattern pattern) throws IOException {
        int ind = page.indexOf(marker);
        if (ind < 0) {
            System.out.println(page);
            throw new IOException(Globals.lang("Could not parse number of hits"));
        }
        String substring = page.substring(ind, Math.min(ind + 42, page.length()));
        Matcher m = pattern.matcher(substring);
        if (!m.find()) {
            System.out.println("Unmatched!");
            System.out.println(substring);
        } else {
            try {
                  // get rid of ,
                  String number = m.group(1);
                  //NumberFormat nf = NumberFormat.getInstance();
                  //return nf.parse(number).intValue();
                  number = number.replaceAll(",", "");
                  //System.out.println(number);
                return Integer.parseInt(number);
            } catch (NumberFormatException ex) {
                throw new IOException(Globals.lang("Could not parse number of hits"));
            } catch (IllegalStateException e) {
                throw new IOException(Globals.lang("Could not parse number of hits"));
            }
        }
        throw new IOException(Globals.lang("Could not parse number of hits"));
    }

    /**
     * Download the URL and return contents as a String.
     * @param source
     * @return
     * @throws IOException
     */
    public String getResults(URL source) throws IOException {
        
        InputStream in = source.openStream();
        StringBuffer sb = new StringBuffer();
        byte[] buffer = new byte[256];
        while(true) {
            int bytesRead = in.read(buffer);
            if(bytesRead == -1) break;
            for (int i=0; i<bytesRead; i++)
                sb.append((char)buffer[i]);
        }
        return sb.toString();
    }

    /**
     * Read results from a file instead of an URL. Just for faster debugging.
     * @param f
     * @return
     * @throws IOException
     */
    public String getResultsFromFile(File f) throws IOException {
        InputStream in = new BufferedInputStream(new FileInputStream(f));
        StringBuffer sb = new StringBuffer();
        byte[] buffer = new byte[256];
        while(true) {
            int bytesRead = in.read(buffer);
            if(bytesRead == -1) break;
            for (int i=0; i<bytesRead; i++)
                sb.append((char)buffer[i]);
        }
        return sb.toString();
    }

      public String getTitle() {
          return Globals.menuTitle("Search ACM Portal");
      }
      
      
      public URL getIcon() {
          return GUIGlobals.getIconUrl("www");
      }
      
      public String getHelpPage() {
          return "ACMPortalHelp.html";
      }
      
      public String getKeyName() {
          return "Search ACM Portal";
      }
      
      // This method is called by the dialog when the user has cancelled the import.
      public void cancelled() {
          shouldContinue = false;
      }
      
      // This method is called by the dialog when the user has selected the
      //wanted entries, and clicked Ok. The callback object can update status
      //line etc.
      public void done(int entriesImported) {
          //System.out.println("Number of entries parsed: "+parsed);
          //System.out.println("Parsing failed for "+unparseable+" entries");
      }
      
      // This method is called by the dialog when the user has cancelled or
      //signalled a stop. It is expected that any long-running fetch operations
      //will stop after this method is called.
      public void stopFetching() {
          shouldContinue = false;
      }

    
}

Generated by  Doxygen 1.6.0   Back to index