aksw.pending.bib

% This file was created with JabRef 2.10.
% Encoding: UTF-8

@InProceedings{ISWC_2016_Resources_GERBILQA,
  Title                    = {Benchmarking Question Answering Systems},
  Author                   = {Ricardo Usbeck and Michael Röder and Christina Unger and Michael Hoffmann and Christian Demmler and Jonathan Huthmann and Axel-Cyrille Ngonga Ngomo},
  Booktitle                = {Submitted to ISWC 2016 Resource Track},
  Year                     = {2016},
  Keywords                 = {sys:relevantFor:infai sys:relevantFor:bis ermilov ngonga  roeder simba usbeck group_aksw gerbil hobbit diesel qamel}
}
@InProceedings{ISWC_2016_Ginseng,
  Title                    = {Ginseng -- A Generic RDF Data Access Interface},
  Author                   = {Timofey Ermilov and Diego Moussallem and Ricardo Usbeck and Axel-Cyrille Ngonga Ngomo},
  Booktitle                = {Submitted to ISWC 2016},
  Year                     = {2016},
  Keywords                 = {sys:relevantFor:infai sys:relevantFor:bis ermilov ngonga  moussallem simba usbeck group_aksw hawk diesel qamel}
}


@InProceedings{BENGAL_2016,
  Title                    = {Automatic Generation of Benchmarks for Entity Recognition and Linking},
  Author                   = {Michael Röder and Axel-Cyrille Ngonga Ngomo and Diego Moussallem and Ricardo Usbeck},
  Booktitle                = {ACL},
  Year                     = {2016},
  Keywords                 = {sys:relevantFor:infai sys:relevantFor:bis ermilov ngonga simba usbeck group_aksw hawk}
}

@InProceedings{HAWK_ISWC_demo_2015,
  Title                    = {Hybrid Question Answering with HAWK},
  Author                   = {Ricardo Usbeck and Ivan Ermilov and  Axel-Cyrille {{Ngonga Ngomo}}},
  Booktitle                = {Demo at ISWC 2015},
  Year                     = {2015},
  Keywords                 = {sys:relevantFor:infai sys:relevantFor:bis ermilov ngonga simba usbeck group_aksw hawk}
}

@InProceedings{Brunetti2013,
  Title                    = {Formal Linked Data Visualization Model},
  Author                   = {Josep Maria Brunetti and S{\"o}ren Auer and Roberto Garc\'\i{}a and Jakub Kl{\'\i}mek and Martin Ne{\v c}ask{\'y}},
  Booktitle                = {Submitted to 12th International Semantic Web Conference, 21-25 October 2013, Sydney, Australia},
  Year                     = {2013},

  Abstract                 = {In the last years, the amount of semantic data available in the Web has increased dramatically. The potential of this vast amount of data is enormous but in most cases it is very difficult for users to explore and use this data, especially for those without experience with Semantic Web technologies. Applying information visualization techniques to the Semantic Web helps users to easily explore large amounts of data and interact with them. In this article we devise a formal Linked Data Visualization Model (LDVM), which allows to \emph{dynamically} connect data with visualizations. We report about our comprehensive implementation of the LDVM comprising a library of generic visualizations that enable both users and data analysts to get an overview on, visualize and explore the Data Web and perform detailed analyzes on Linked Data.},
  Bdsk-url-1               = {http://svn.aksw.org/papers/2013/WWW_LDVM/iswc2013/public.pdf},
  Keywords                 = {auer group_aksw sys:relevantFor:infai sys:relevantFor:bis sys:relevantFor:lod2 lod2page 2013 event_ISWC},
  Owner                    = {soeren},
  Timestamp                = {2013.06.01},
  Url                      = {http://svn.aksw.org/papers/2013/WWW_LDVM/iswc2013/public.pdf}
}

@InProceedings{ermilov-i-2013--a,
  Title                    = {{C}rowd-{S}ourcing the {L}arge-{S}cale {S}emantic {M}apping of {T}abular {D}ata},
  Author                   = {Ivan Ermilov and S{\"o}ren Auer and Claus Stadler},
  Booktitle                = {To appear in Proceedings of 9th International Conference on Semantic Systems, I-SEMANTICS '13, Graz, Austria, September 4-6, 2013},
  Year                     = {2013},
  Publisher                = {ACM},

  Abstract                 = {Governments and public administrations started recently to publish large amounts of structured data on the Web, mostly in the form of tabular data such as CSV files or Excel sheets. Various tools and projects have been launched aiming at facilitating the lifting of tabular data to reach semantically structured and linked data. However, none of these tools supported a truly incremental, pay-as-you-go data publication and mapping strategy, which enables effort sharing between data owners, community experts and consumers. In this article, we present an approach for enabling the crowd-sourcing of the large-scale semantic mapping of tabular data. We devise a simple mapping language for tabular data, which is easy to understand even for casual users, but expressive enough to cover the vast majority of potential tabular mappings use cases. Default mappings are automatically created and can be revised by the community using a semantic wiki. The mappings are executed using a sophisticated streaming RDB2RDF conversion. We report about the deployment of our approach at the Pan-European data portal PublicData.eu, where we transformed and enriched almost 10,000 datasets accounting for 7.3 billion triples.},
  Bdsk-url-1               = {http://svn.aksw.org/papers/2013/ISemantics_CSV2RDF/public.pdf},
  Keywords                 = {2012 group_aksw iermilov auer stadler sys:relevantFor:infai sys:relevantFor:bis sys:relevantFor:lod2 lod2page peer-reviewed},
  Owner                    = {ivan},
  Timestamp                = {2013.06.01},
  Url                      = {http://svn.aksw.org/papers/2013/ISemantics_CSV2RDF/public.pdf}
}

@InProceedings{Ermilov2013,
  Title                    = {Enabling Linked Data access to the \\Internet of Things},
  Author                   = {Timofey Ermilov and S\"oren Auer},
  Booktitle                = {Submitted to 12th International Semantic Web Conference, 21-25 October 2013, Sydney, Australia},
  Year                     = {2013},

  Abstract                 = {The term Internet of Things refers to the vision, that all kinds of physical objects are uniquely identifiable and have a virtual representation on the Internet. We present an approach for equipping embedded and smart devices on the Internet of Things with a Linked Data interface. The approach is based on mapping existing structured data on the device to vocabularies and ontologies and exposing this information as dereferencable RDF directly from within the device. As a result, all smart devices (e.g. tablets, smartphones, TVs) can easily provide standardized structured information and become first class citizens on the Data Web. A particular specific requirement when dealing with smart and embedded devices are resource constraints. Our evaluation shows, that the overhead introduced by equipping a device with a Linked Data interface is neglectable given modern software and hardware environments.},
  Bdsk-url-1               = {http://svn.aksw.org/papers/2013/ISWC_LinkedDataInternetOfThings/public.pdf},
  Keywords                 = {ermilov auer group_aksw sys:relevantFor:infai sys:relevantFor:bis sys:relevantFor:lod2 lod2page 2013 event_ISWC},
  Owner                    = {soeren},
  Timestamp                = {2013.06.01},
  Url                      = {http://svn.aksw.org/papers/2013/ISWC_LinkedDataInternetOfThings/public.pdf}
}

@InProceedings{Ermilov+2013,
  Title                    = {LODStats -- Large Scale Dataset Analytics for Linked Open Data},
  Author                   = {Ivan Ermilov, Jan Demter, Michael Martin, Jens Lehmann, S{\"o}ren Auer},
  Booktitle                = {Submitted to 12th International Semantic Web Conference, 21-25 October 2013, Sydney, Australia},
  Year                     = {2013},

  Abstract                 = {In order to reuse, link, revise or query data made available on the Web, it is important to know the structure, size and coverage of it. To achieve this, we developed and evaluated LODStats -- a statement-stream-based approach for gathering comprehensive statistics about data adhering to the RDF data model. LODStats is based on the declarative description of statistical dataset characteristics. Its main advantages over related approaches are a smaller memory footprint and significantly better performance and scalability. We integrated LODStats with CKAN and obtained a comprehensive picture of the current state of a significant part of the Data Web. This analysis is regularly published and enhanced over the past two years at the public platform stats.lod2.eu.},
  Bdsk-url-1               = {http://svn.aksw.org/papers/2013/ISWC_LODStats/public.pdf},
  Keywords                 = {ermilov lehmann martin auer group_aksw sys:relevantFor:infai sys:relevantFor:bis sys:relevantFor:lod2 lod2page 2013 event_ISWC},
  Owner                    = {soeren},
  Timestamp                = {2013.06.01},
  Url                      = {http://svn.aksw.org/papers/2013/ISWC_LODStats/public.pdf}
}

@InProceedings{Kontokostas2013,
  Title                    = {Test-driven Data Quality Evaluation for SPARQL Endpoints},
  Author                   = {Dimitris Kontokostas and S\"oren Auer and Sebastian Hellmann and Jens Lehmann and Patrick Westphal and Roland Cornelissen and Amrapali Zaveri},
  Booktitle                = {Submitted to 12th International Semantic Web Conference, 21-25 October 2013, Sydney, Australia},
  Year                     = {2013},

  Abstract                 = {Linked Open Data (LOD) comprises of an unprecedented volume of structured data on the Web. However, these datasets are of varying quality ranging from extensively curated datasets to crowd-sourced or extracted data of often relatively low quality. In this paper we, present a methodology for test-driven data quality assessment, which is inspired by test-driven software development. We argue, that knowledge bases should be accompanied by a number of test-cases, which help to ensure a basic level of quality. We present a methodology for assessing the quality of linked data resources, based on a formalization of bad smells and data quality problems. Our formalization employs SPARQL query templates, which are instantiated into concrete quality test queries. Based on an extensive literature review, we compile a comprehensive library of quality test patterns. The main contribution of our work is an extensive, unprecedented evaluation of DBpedia data quality employing our test methodology. One of the main advantages of our approach is that domain specific semantics can be encoded in the data quality test cases, thus being able to discover data quality problems beyond conventional quality heuristics.},
  Bdsk-url-1               = {http://svn.aksw.org/papers/2013/ISWC_Databugger/public.pdf},
  Keywords                 = {zaveri auer lehmann hellmann kontokostas group_aksw sys:relevantFor:infai sys:relevantFor:bis sys:relevantFor:lod2 lod2page 2013 event_ISWC},
  Owner                    = {soeren},
  Timestamp                = {2013.06.01},
  Url                      = {http://svn.aksw.org/papers/2013/ISWC_Databugger/public.pdf}
}

@Article{RDFSlice_efficient,
  Title                    = {Towards an Efficient RDF Dataset Slicing},
  Author                   = {Edgard Marx and Tommaso Soru and Saeedeh Shekarpour and S{\"o}ren Auer and Axel-Cyrille {Ngonga Ngomo} and Karin Breitman},
  Journal                  = {Submitted to the International Journal of Semantic Computing},
  Year                     = {2013},

  Abstract                 = {Over the last years, a considerable amount of structured data has been published on the Web as Linked Open Data (LOD).Despite recent advances, consuming and using Linked Open Data within an organization is still a substantial challenge.Many of the LOD datasets are quite large and despite progress in RDF data management their loading and querying within a triple store is extremely time-consuming and resource-demanding.To overcome this consumption obstacle, we propose a process inspired by the classical Extract-Transform-Load (ETL) paradigm.In this article, we focus particularly on the selection and extraction steps of this process.We devise a fragment of SPARQL dubbed SliceSPARQL, which enables the selection of well-defined slices of datasets fulfilling typical information needs.SliceSPARQL supports graph patterns for which each connected subgraph pattern involves a maximum of one variable or IRI in its join conditions.This restriction guarantees the efficient processing of the query against a sequential dataset dump stream.Furthermore, we evaluate our slicing approach on three different optimization strategies.Results show that dataset slices can be generated an order of magnitude faster than by using the conventional approach of loading the whole dataset into a triple store.},
  Keywords                 = {shekarpour marx ngonga auer soru group_aksw sys:relevantFor:infai sys:relevantFor:bis sys:relevantFor:lod2 lod2page 2013 event_ICSC},
  Owner                    = {Edgard},
  Timestamp                = {2013.12.05},
  Url                      = {http://svn.aksw.org/papers/2013/IJSC/ws-ijsc.pdf}
}

@InProceedings{largerdfbench2015,
  Title                    = {LargeRDFBench: A Billion Triples Benchmark for SPARQL Endpoint Federation},
  Author                   = {Muhammad Saleem and Ali Hasnain and Axel-Cyrille {{Ngonga Ngomo}}},
  Booktitle                = {Submitted to the ISWC 2015},
  Year                     = {2015},

  Keywords                 = {saleem ngonga simba quetsal group_aksw sys:relevantFor:infai},
  Url                      = {http://svn.aksw.org/papers/2015/ESWC_LargeRDFBench/public.pdf}
}

@Article{SINA_WebSemantic,
  Title                    = {SINA: Semantic Interpretation of User Queries for Question Answering on Interlinked Data},
  Author                   = {Saeedeh Shekarpour and Edgard Marx and Axel-Cyrille {Ngonga Ngomo} and S{\"o}ren Auer},
  Journal                  = {Submitted to Journal of Web Semantics},
  Year                     = {2013},

  Abstract                 = {The architectural choices underlying Linked Data have led to a compendium of data sources which contain both duplicated and fragmented information on a large number of domains. One way to enable non-experts users to access this data compendium is to provide keyword search frameworks that can capitalize on the inherent characteristics of Linked Data. Developing such systems is challenging for three main reasons. First, resources across different datasets or even within the same dataset can be homonyms. Second, different datasets employ heterogeneous schemas and each one may only contain a part of the answer for a certain user query. Finally, constructing a federated formal query from keywords across different datasets requires exploiting links between the different datasets on both the schema and instance levels. We present Sina, a scalable keyword search system that can answer user queries by transforming user-supplied keywords or natural-languages queries into conjunctive SPARQL queries over a set of interlinked data sources. Sina uses a hidden Markov model to determine the most suitable resources for a user-supplied query from different datasets. Moreover, our framework is able to construct federated queries by us- ing the disambiguated resources and leveraging the linking structure underlying the datasets to query. We evaluate Sina over three different datasets. Sina can answer 25 queries from the QALD-1 correctly; furthermore, it performs as well as the best system from the QALD-3 competition by answering 32 questions correctly. More importantly, it is capable of answering queries on interlinked distributed sources. Moreover, we study the runtime of SINA in its sequential as well as parallel implementations and draw conclusions on the scalability of keyword search on Linked Data.},
  Keywords                 = {shekarpour marx ngonga auer group_aksw sys:relevantFor:infai sys:relevantFor:bis sys:relevantFor:lod2 lod2page 2013 event_ICSC},
  Owner                    = {Saeedeh},
  Timestamp                = {2013.06.20},
  Url                      = {http://svn.aksw.org/papers/2013/WebSemantic_SINA/public.pdf}
}

@InProceedings{sherif-semanticquran,
  Title                    = {Semantic Quran a Multilingual Resource for Natural-Language Processing},
  Author                   = {Mohamed A. Sherif and Axel-Cyrille {{Ngonga Ngomo}}},
  Booktitle                = {Submitted to the Semantic Web Journals special call for Multilingual Linked Open Data (MLOD) 2012},
  Year                     = {2012},

  Abstract                 = {In this paper we describe the Semantic Quran dataset, a multilingual RDF representation of translations of the Quran. The dataset was created by integrating data from two different semi-structured sources and aligned to an ontology designed to represent multilingual data from sources with a hierarchical structure. The resulting RDF data encompasses 43 different languages which belong to the most under-represented languages in the Linked Data Cloud, including Arabic, Amharic and Amazigh. We designed the dataset to be easily usable in natural-language processing applications with the goal of facilitating the development of knowledge extraction tools for these languages. In particular, the Semantic Quran is compatible with the Natural-Language Interchange Format and contains explicit morpho-syntactic information on the utilized terms. We present the ontology devised for structuring the data. We also provide the transformation rules implemented in our extraction framework. Finally, we detail the link creation process as well as possible usage scenarios for the Semantic Quran dataset.},
  Keywords                 = {sys:relevantFor:infai sys:relevantFor:bis sherif ngonga semanticquran group_aksw nlp2rdf_publications},
  Owner                    = {sherif},
  Timestamp                = {2013.11.07},
  Url                      = {http://www.semantic-web-journal.net/system/files/swj503.pdf}
}

@Article{sparqlify,
  Title                    = {Connecting Crowd-sourced Spatial Information to the Data Web with Sparqlify},
  Author                   = {Claus Stadler and J{\"o}org Unbehauen and Jens Lehmann and S{\"o}ren Auer},
  Journal                  = {VLDB Special Issue on Structured, Social and Crowd-sourced Data on the Web (under review)},
  Year                     = {2012},

  Keywords                 = {sys:relevantFor:bis sys:relevantFor:infai stadler lehmann unbehauen auer 2012 lgd sparqlify},
  Owner                    = {stadler},
  Timestamp                = {2012.11.14},
  Url                      = {http://www.semantic-web-journal.net/sites/default/files/swj173_2.pdf}
}