Logo Search packages:      
Sourcecode: jabref version File versions  Download package

OAI2Fetcher.java

package net.sf.jabref.imports;

import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.util.Date;

import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import net.sf.jabref.BibtexEntry;
import net.sf.jabref.BibtexEntryType;
import net.sf.jabref.GUIGlobals;
import net.sf.jabref.Globals;
import net.sf.jabref.OutputPrinter;
import net.sf.jabref.Util;

import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/**
 * 
 * This class can be used to access any archive offering an OAI2 interface. By
 * default it will access ArXiv.org
 * 
 * @author Ulrich Stärk
 * @author Christian Kopf
 * 
 * @version $Revision: 3203 $ ($Date: 2010-03-04 19:05:17 +0100 (Thu, 04 Mar 2010) $)
 * 
 */
00038 public class OAI2Fetcher implements EntryFetcher {

    public static final String OAI2_ARXIV_PREFIXIDENTIFIER = "oai%3AarXiv.org%3A";

    public static final String OAI2_ARXIV_HOST = "export.arxiv.org";

    public static final String OAI2_ARXIV_SCRIPT = "oai2";

    public static final String OAI2_ARXIV_METADATAPREFIX = "arXiv";

    public static final String OAI2_ARXIV_ARCHIVENAME = "ArXiv.org";

    public static final String OAI2_IDENTIFIER_FIELD = "oai2identifier";

    private SAXParserFactory parserFactory;

    private SAXParser saxParser;

    private String oai2Host;

    private String oai2Script;

    private String oai2MetaDataPrefix;

    private String oai2PrefixIdentifier;

    private String oai2ArchiveName;

    private boolean shouldContinue = true;

    private OutputPrinter status;

    /**
     * some archives - like arxive.org - might expect of you to wait some time 
     */
00073     private boolean shouldWait() {
        return waitTime > 0;
    }

    private long waitTime = -1;

    private Date lastCall;

    /**
     * 
     * 
     * @param oai2Host
     *            the host to query without leading http:// and without trailing /
     * @param oai2Script
     *            the relative location of the oai2 interface without leading
     *            and trailing /
     * @param oai2Metadataprefix
     *            the urlencoded metadataprefix
     * @param oai2Prefixidentifier
     *            the urlencoded prefix identifier
     * @param waitTimeMs
     *            Time to wait in milliseconds between query-requests.
     */
00096     public OAI2Fetcher(String oai2Host, String oai2Script, String oai2Metadataprefix,
        String oai2Prefixidentifier, String oai2ArchiveName, long waitTimeMs) {
        this.oai2Host = oai2Host;
        this.oai2Script = oai2Script;
        this.oai2MetaDataPrefix = oai2Metadataprefix;
        this.oai2PrefixIdentifier = oai2Prefixidentifier;
        this.oai2ArchiveName = oai2ArchiveName;
        this.waitTime = waitTimeMs;
        try {
            parserFactory = SAXParserFactory.newInstance();
            saxParser = parserFactory.newSAXParser();
        } catch (ParserConfigurationException e) {
            e.printStackTrace();
        } catch (SAXException e) {
            e.printStackTrace();
        }
    }

    /**
     * Default Constructor. The archive queried will be ArXiv.org
     * 
     */
00118     public OAI2Fetcher() {
        this(OAI2_ARXIV_HOST, OAI2_ARXIV_SCRIPT, OAI2_ARXIV_METADATAPREFIX,
            OAI2_ARXIV_PREFIXIDENTIFIER, OAI2_ARXIV_ARCHIVENAME, 20000L);
    }

    /**
     * Construct the query URL
     * 
     * @param key
     *            The key of the OAI2 entry that the url should point to.
     *            
     * @return a String denoting the query URL
     */
00131     public String constructUrl(String key) {
        String identifier = "";
        try {
            identifier = URLEncoder.encode(key, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            return "";
        }
        StringBuffer sb = new StringBuffer("http://").append(oai2Host).append("/");
        sb.append(oai2Script).append("?");
        sb.append("verb=GetRecord");
        sb.append("&identifier=");
        sb.append(oai2PrefixIdentifier);
        sb.append(identifier);
        sb.append("&metadataPrefix=").append(oai2MetaDataPrefix);
        return sb.toString();
    }
    
    /**
     * Strip subcategories from ArXiv key.
     * 
     * @param key The key to fix.
     * @return Fixed key.
     */
00154     public static String fixKey(String key){
        
        if (key.toLowerCase().startsWith("arxiv:")){
            key = key.substring(6);
        }
        
        int dot = key.indexOf('.');
        int slash = key.indexOf('/');
        
        if (dot > -1 && dot < slash)
            key = key.substring(0, dot) + key.substring(slash, key.length());
    
        return key;
    }

    public static String correctLineBreaks(String s){
        s = s.replaceAll("\\n(?!\\s*\\n)", " ");
        s = s.replaceAll("\\s*\\n\\s*", "\n");
        return s.replaceAll(" {2,}", " ").replaceAll("(^\\s*|\\s+$)", "");
    }
    
    /**
     * Import an entry from an OAI2 archive. The BibtexEntry provided has to
     * have the field OAI2_IDENTIFIER_FIELD set to the search string.
     * 
     * @param key
     *            The OAI2 key to fetch from ArXiv.
     * @return The imnported BibtexEntry or null if none.
     */
00183     public BibtexEntry importOai2Entry(String key) {
        /**
         * Fix for problem reported in mailing-list: 
         *   https://sourceforge.net/forum/message.php?msg_id=4087158
         */
        key = fixKey(key);
        
        String url = constructUrl(key);
        try {
            URL oai2Url = new URL(url);
            HttpURLConnection oai2Connection = (HttpURLConnection) oai2Url.openConnection();
            oai2Connection.setRequestProperty("User-Agent", "Jabref");
            InputStream inputStream = oai2Connection.getInputStream();
    
            /* create an empty BibtexEntry and set the oai2identifier field */
            BibtexEntry be = new BibtexEntry(Util.createNeutralId(), BibtexEntryType.ARTICLE);
            be.setField(OAI2_IDENTIFIER_FIELD, key);
            DefaultHandler handlerBase = new OAI2Handler(be);
            /* parse the result */
            saxParser.parse(inputStream, handlerBase);
            
            /* Correct line breaks and spacing */
            for (String name : be.getAllFields()){
                be.setField(name, OAI2Fetcher.correctLineBreaks(be.getField(name).toString()));
            }
            
            if (key.matches("\\d\\d\\d\\d\\..*")){
                be.setField("year", "20" + key.substring(0,2));
                
                int month = Integer.parseInt(key.substring(2,4));
                if (month >= 1 && month <= 12){
                    be.setField("month", "#" + Globals.MONTHS[month - 1] + "#");
                }
            }
            
            return be;
        } catch (IOException e) {
            status.showMessage(Globals.lang(
                "An Exception ocurred while accessing '%0'", url)
                + "\n\n" + e.toString(), Globals.lang(getKeyName()), JOptionPane.ERROR_MESSAGE);
        } catch (SAXException e) {
            status.showMessage(Globals.lang(
                "An SAXException ocurred while parsing '%0':", new String[]{url})
                + "\n\n" + e.getMessage(), Globals.lang(getKeyName()), JOptionPane.ERROR_MESSAGE);
        } catch (RuntimeException e){
            status.showMessage(Globals.lang(
                "An Error occurred while fetching from OAI2 source (%0):", new String[]{url})
                + "\n\n" + e.getMessage(), Globals.lang(getKeyName()), JOptionPane.ERROR_MESSAGE);
        } 
        return null;
    }

00235     public String getHelpPage() {
        // there is no helppage
        return null;
    }

00240     public URL getIcon() {
        return GUIGlobals.getIconUrl("www");
    }

00244     public String getKeyName() {
        return "Fetch " + oai2ArchiveName;
    }

00248     public JPanel getOptionsPanel() {
        // we have no additional options
        return null;
    }

00253     public String getTitle() {
        return Globals.menuTitle(getKeyName());
    }

00257     public boolean processQuery(String query, ImportInspector dialog, OutputPrinter status) {

        this.status = status;

        try {
            shouldContinue = true;
            
            /* multiple keys can be delimited by ; or space */
            query = query.replaceAll(" ", ";");
            String[] keys = query.split(";");
            for (int i = 0; i < keys.length; i++) {
                String key = keys[i];
                
                /*
                 * some archives - like arxive.org - might expect of you to wait
                 * some time
                 */
                if (shouldWait() && lastCall != null) {

                    long elapsed = new Date().getTime() - lastCall.getTime();

                    while (elapsed < waitTime) {
                        status.setStatus(Globals.lang("Waiting for ArXiv...") + ((waitTime - elapsed) / 1000) + " s");
                        Thread.sleep(1000);
                        elapsed = new Date().getTime() - lastCall.getTime();
                    }
                }

                status.setStatus(Globals.lang("Processing ") + key);

                /* the cancel button has been hit */
                if (!shouldContinue)
                    break;
                
                /* query the archive and load the results into the BibtexEntry */
                BibtexEntry be = importOai2Entry(key);

                if (shouldWait())
                    lastCall = new Date();
                
                /* add the entry to the inspection dialog */
                if (be != null)
                    dialog.addEntry(be);

                /* update the dialogs progress bar */
                dialog.setProgress(i + 1, keys.length);
            }
            
            return true;
        } catch (Exception e) {
            status.setStatus(Globals.lang("Error while fetching from OIA2")+ ": " + e.getMessage());
            e.printStackTrace();
        }
        return false;
    }

    public void stopFetching() {
        shouldContinue = false;
    }
}

Generated by  Doxygen 1.6.0   Back to index