Integration with CI (Jenkins)

In this article we briefly describe how to use DBpedia Databus with continuous integration systems like Jenkins and provide several examples of jenkins pipelines.

Publishing your data files (datasets) into Databus.

You can use CI tools for publishing metadata of your data into Databus. Here is an example of jenkins pipeline for that:

// databus DataID template for publishing (this is a minimal version)
// here we are publishing only one file
def req(downloadUrl, username, artifact, version, licenseUrl){
    return """{
        "@context": "https://downloads.dbpedia.org/databus/context.jsonld",
        "@graph": [
        {
          "@type": "Version",
          "@id": "https://databus.dbpedia.org/${username}/jenkins/${artifact}/${version}",
          "hasVersion": "${version}",
          "title": "Test jenkins",
          "description": "Test jenkins",
          "license": "${licenseUrl}",
          "distribution": [
            {
              "@type": "Part",
              "formatExtension": "txt",
              "compression": "none",
              "downloadURL": "${downloadUrl}"
            }
          ]
        }
        ]
    }"""
}

pipeline {
    agent any
    stages {
        stage("Generate data"){
            steps{
                // we create file for demonstration purpose
                script {
                    sh "echo 'Hello World!' > 'jenkins-test-file-${BUILD_DATE}-${BUILD_NUMBER}.txt'"
                }
            }
        }
        // we transfer the file to a nginx www location, the file gets downloadable. 
        stage('SSH transfer') {
            steps([$class: 'BapSshPromotionPublisherPlugin']) {
                sshPublisher(
                    continueOnError: false, failOnError: true,
                    publishers: [
                        sshPublisherDesc(
                            configName: "nginx",
                            verbose: true,
                            transfers: [
                                sshTransfer(sourceFiles: "*.txt", remoteDirectory: "jenkins-test/${BUILD_DATE}")
                            ]
                        )
                    ]
                )
            }
        }
    // we publish the file to databus specifying its download link
    stage("Publish to Databus"){
            steps{
                script{
                    // USERNAME is your Databus username
                    withCredentials([usernamePassword(credentialsId: 'DBUS-Kikiriki', usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD')]){
                        def body = req(
                                // download uri
                                "http://test.dbpedia.org/data/jenkins-test/${BUILD_DATE}/jenkins-test-file-${BUILD_DATE}-${BUILD_NUMBER}.txt",
                                // your Databus username
                                USERNAME,
                                "jenkins",
                                // you specify this as a Databus version
                                "${BUILD_DATE}-${BUILD_NUMBER}",
                                "https://dalicc.net/licenselibrary/Apache-2.0"
                                )
                        echo """DataID: 
                        ${body}"""
                            
                            
                        def response = httpRequest  validResponseCodes: "200",
                            consoleLogResponseBody: true,  
                            httpMode: 'POST', quiet: true,
                            requestBody: body,
                         url: "https://databus.dbpedia.org/api/publish",                       
                         customHeaders:[
                             // here is you Databus Api Key
                             [name:'X-API-KEY', value: PASSWORD],
                             [name: "Content-Type", value: "application/ld+json"]
                             ]
                             
                         echo "Status: ${response.content}"
                        }
                }
            }
        }
    }
}

Downloading data files (datasets) from Databus.

Here is a sample script of how to download the latest version of an artifact from Databus in a jenkins pipeline:

// A template for SPARQL query.
// We query 1 file of the latest version of an artifact.
// !!! NOTE that it queries only one file (LIMIT 1), in our case with 1-file artifact it works  
def req(artifact){
    return """
        PREFIX dcat:   <http://www.w3.org/ns/dcat#>
        PREFIX databus: <https://dataid.dbpedia.org/databus#>
        PREFIX dct: <http://purl.org/dc/terms/>
        
        SELECT ?file WHERE
        {
                GRAPH ?g
                {
                        ?dataset databus:artifact <${artifact}> .
                        ?dataset dct:hasVersion ?v . 
                        ?dataset dcat:distribution ?distribution .
                        ?distribution databus:file ?file .
                }
        } 
        ORDER BY DESC (STR(?v)) LIMIT 1
        """
}


pipeline {
    agent any
    stages {

    stage("latest artifact file"){
        steps{
            script{
                def body = req(
                        "https://databus.dbpedia.org/kikiriki/jenkins/jenkins"
                        )
                // wrap in a json (x-www-urlencoded also works)
                def jsonBody = new groovy.json.JsonBuilder(query: body).toPrettyString()
                echo "Query is: \n${body}"
                    
                // send post http-request to a databus SPARQL endpoint 
                def response = httpRequest  validResponseCodes: "200",
                    consoleLogResponseBody: true,  
                    httpMode: 'POST', quiet: true,
                    requestBody: jsonBody,
                 url: "https://databus.dbpedia.org/sparql",                       
                 customHeaders:[
                     [name: "Content-Type", value: "application/json"],
                     [name: "Accept", value: "text/csv"]
                     ]
                 // if we configure Accept: text/csv the endpoint returns this:
                 // "file"
                 // "https://databus.dbpedia.org/kikiriki/jenkins/jenkins/2024-04-09-9/jenkins.txt"
                 echo "Response: ${response.content}"
                 // we extract the URI from the response 
                 def fn = response.content.split('\n')[1].replaceAll('"', '').trim()
                 
                 echo "Download URI: ${fn}"
                // we can use the URI to download the file using curl
                 sh "curl -O ${fn}"
            }
        }
    }
    }
    
}

Last updated