1. Glass, B, 1957, New missing link discovered.
BibTeX
@misc{glass1957new1,
author = "Glass, B",
title = "New missing link discovered",
year = "1957",
howpublished = "Science, v. 126, p. 158-159",
note = "talkorigins\_source = {true}; raw\_reference = {Glass, B., 1957, New missing link discovered: Science, v. 126, p. 158-159.}"
}
2. Ester, Martin and Kriegel, Hans‐Peter and Sander, Jörg and Xu, Xiaowei, 1996, A density-based algorithm for discovering clusters in large spatial Databases with Noise.
Abstract
Clustering algorithms are attractive for the task of class iden-tification in spatial databases. However, the application to large spatial databases rises the following requirements for clustering algorithms: minimal requirements of domain knowledge to determine the input parameters, discovery of clusters with arbitrary shape and good efficiency on large da-tabases. The well-known clustering algorithms offer no solu-tion to the combination of these requirements. In this paper, we present the new clustering algorithm DBSCAN relying on a density-based notion of clusters which is designed to dis-cover clusters of arbitrary shape. DBSCAN requires only one input parameter and supports the user in determining an ap-propriate value for it. We performed an experimental evalua-tion of the effectiveness and efficiency of DBSCAN using synthetic data and real data of the SEQUOIA 2000 bench-mark. The results of our experiments demonstrate that (1) DBSCAN is significantly more effective in discovering clus-ters of arbitrary shape than the well-known algorithm CLAR-ANS, and that (2) DBSCAN outperforms CLARANS by factor of more than 100 in terms of efficiency.
BibTeX
@article{openalexw1673310716,
author = "Ester, Martin and Kriegel, Hans‐Peter and Sander, Jörg and Xu, Xiaowei",
title = "A density-based algorithm for discovering clusters in large spatial Databases with Noise",
year = "1996",
abstract = "Clustering algorithms are attractive for the task of class iden-tification in spatial databases. However, the application to large spatial databases rises the following requirements for clustering algorithms: minimal requirements of domain knowledge to determine the input parameters, discovery of clusters with arbitrary shape and good efficiency on large da-tabases. The well-known clustering algorithms offer no solu-tion to the combination of these requirements. In this paper, we present the new clustering algorithm DBSCAN relying on a density-based notion of clusters which is designed to dis-cover clusters of arbitrary shape. DBSCAN requires only one input parameter and supports the user in determining an ap-propriate value for it. We performed an experimental evalua-tion of the effectiveness and efficiency of DBSCAN using synthetic data and real data of the SEQUOIA 2000 bench-mark. The results of our experiments demonstrate that (1) DBSCAN is significantly more effective in discovering clus-ters of arbitrary shape than the well-known algorithm CLAR-ANS, and that (2) DBSCAN outperforms CLARANS by factor of more than 100 in terms of efficiency.",
openalex = "W1673310716"
}
3. Gibson, David and Kleinberg, Jon and Raghavan, Prabhakar, 1998, Inferring Web communities from link topology.
Abstract
The World Wide Web grows through a decentralized, almost anarchic process, and this has resulted in a large hyperlinked corpus without the kind of logical organization that can be built into more traditionally-created hypermedia. To extract meaningful structure under such circumstances, we develop a notion of hyperlinked communities on the www through an analysis of the link topology. By invoking a simple, mathematically clean method for defining and exposing the structure of these communities, we are able to derive a number of themes: The communities can be viewed as containing a core of central, "authoritative" pages linked together by "hub pages"; and they exhibit a natural type of hierarchical topic generalization that can be inferred directly from the pattern of linkage. Our investigation shows that although the process by which users of the Web create pages and links is very difficult to understand at a "local" level, it results in a much greater degree of orderly high-level stru...
BibTeX
@article{doi101145276627276652,
author = "Gibson, David and Kleinberg, Jon and Raghavan, Prabhakar",
title = "Inferring Web communities from link topology",
year = "1998",
abstract = "The World Wide Web grows through a decentralized, almost anarchic process, and this has resulted in a large hyperlinked corpus without the kind of logical organization that can be built into more traditionally-created hypermedia. To extract meaningful structure under such circumstances, we develop a notion of hyperlinked communities on the www through an analysis of the link topology. By invoking a simple, mathematically clean method for defining and exposing the structure of these communities, we are able to derive a number of themes: The communities can be viewed as containing a core of central, \"authoritative\" pages linked together by \"hub pages\"; and they exhibit a natural type of hierarchical topic generalization that can be inferred directly from the pattern of linkage. Our investigation shows that although the process by which users of the Web create pages and links is very difficult to understand at a \"local\" level, it results in a much greater degree of orderly high-level stru...",
url = "https://doi.org/10.1145/276627.276652",
doi = "10.1145/276627.276652",
openalex = "W2020423193"
}
4. Liben‐Nowell, David and Kleinberg, Jon, 2003, The link prediction problem for social networks.
Abstract
Given a snapshot of a social network, can we infer which new interactions among its members are likely to occur in the near future? We formalize this question as the link-prediction problem, and we develop approaches to link prediction based on measures for analyzing the “proximity” of nodes in a network. Experiments on large co-authorship networks suggest that information about future interactions can be extracted from network topology alone, and that fairly subtle measures for detecting node proximity can outperform more direct measures.
BibTeX
@article{doi101145956863956972,
author = "Liben‐Nowell, David and Kleinberg, Jon",
title = "The link prediction problem for social networks",
year = "2003",
abstract = "Given a snapshot of a social network, can we infer which new interactions among its members are likely to occur in the near future? We formalize this question as the link-prediction problem, and we develop approaches to link prediction based on measures for analyzing the “proximity” of nodes in a network. Experiments on large co-authorship networks suggest that information about future interactions can be extracted from network topology alone, and that fairly subtle measures for detecting node proximity can outperform more direct measures.",
url = "https://doi.org/10.1145/956863.956972",
doi = "10.1145/956863.956972",
openalex = "W2420733993",
references = "doi101145775047775126"
}
5. Huang, Zan and Li, Xin and Chen, Hsinchun, 2005, Link prediction approach to collaborative filtering.
Abstract
Recommender systems can provide valuable services in a digital library environment, as demonstrated by its commercial success in book, movie, and music industries. One of the most commonly-used and successful recommendation algorithms is collaborative filtering, which explores the correlations within user-item interactions to infer user interests and preferences. However, the recommendation quality of collaborative filtering approaches is greatly limited by the data sparsity problem. To alleviate this problem we have previously proposed graph-based algorithms to explore transitive user-item associations. In this paper, we extend the idea of analyzing user-item interactions as graphs and employ link prediction approaches proposed in the recent network modeling literature for making collaborative filtering recommendations. We have adapted a wide range of linkage measures for making recommendations. Our preliminary experimental results based on a book recommendation dataset show that some of these measures achieved significantly better performance than standard collaborative filtering algorithms.
BibTeX
@article{doi10114510653851065415,
author = "Huang, Zan and Li, Xin and Chen, Hsinchun",
title = "Link prediction approach to collaborative filtering",
year = "2005",
abstract = "Recommender systems can provide valuable services in a digital library environment, as demonstrated by its commercial success in book, movie, and music industries. One of the most commonly-used and successful recommendation algorithms is collaborative filtering, which explores the correlations within user-item interactions to infer user interests and preferences. However, the recommendation quality of collaborative filtering approaches is greatly limited by the data sparsity problem. To alleviate this problem we have previously proposed graph-based algorithms to explore transitive user-item associations. In this paper, we extend the idea of analyzing user-item interactions as graphs and employ link prediction approaches proposed in the recent network modeling literature for making collaborative filtering recommendations. We have adapted a wide range of linkage measures for making recommendations. Our preliminary experimental results based on a book recommendation dataset show that some of these measures achieved significantly better performance than standard collaborative filtering algorithms.",
url = "https://doi.org/10.1145/1065385.1065415",
doi = "10.1145/1065385.1065415",
openalex = "W2151498529"
}
6. Adafre, Sisay Fissaha and de Rijke, Maarten, 2005, Discovering missing links in Wikipedia.
Abstract
In this paper we address the problem of discovering missing hypertext links in Wikipedia. The method we propose consists of two steps: first, we compute a cluster of highly similar pages around a given page, and then we identify candidate links from those similar pages that might be missing on the given page. The main innovation is in the algorithm that we use for identifying similar pages, LTRank, which ranks pages using co-citation and page title information. Both LTRank and the link discovery method are manually evaluated and show acceptable results, especially given the simplicity of the methods and conservativeness of the evaluation criteria.
BibTeX
@article{doi10114511342711134284,
author = "Adafre, Sisay Fissaha and de Rijke, Maarten",
title = "Discovering missing links in Wikipedia",
year = "2005",
abstract = "In this paper we address the problem of discovering missing hypertext links in Wikipedia. The method we propose consists of two steps: first, we compute a cluster of highly similar pages around a given page, and then we identify candidate links from those similar pages that might be missing on the given page. The main innovation is in the algorithm that we use for identifying similar pages, LTRank, which ranks pages using co-citation and page title information. Both LTRank and the link discovery method are manually evaluated and show acceptable results, especially given the simplicity of the methods and conservativeness of the evaluation criteria.",
url = "https://doi.org/10.1145/1134271.1134284",
doi = "10.1145/1134271.1134284",
openalex = "W2071018679",
references = "doi101016s1389128699000225, doi101016s1389128699000407, doi101145775047775126, doi101145985692985765, doi105210fmv8i121108, openalexw1596622341, openalexw169460412, openalexw2106750491, openalexw241431267, openalexw2964758656"
}
7. Hasan, Mohammad Al and Chaoji, Vineet and Salem, Saeed and Zaki, Mohammed J., 2006, Link prediction using supervised learning.
Abstract
Social network analysis has attracted much attention in recent years. Link prediction is a key research direction within this area. In this paper, we study link prediction as a supervised learning task. Along the way, we identify a set of features that are key to the performance under the supervised learning setup. The identified features are very easy to compute, and at the same time surprisingly e#ective in solving the link prediction problem. We also explain the e#ectiveness of the features from their class density distribution. Then we compare di#erent classes of supervised learning algorithms in terms of their prediction performance using various performance metrics, such as accuracy, precision-recall, F-values, squared error etc. with a 5-fold cross validation. Our results on two practical social network datasets shows that most of the well-known classification algorithms (decision tree, k-NN, multilayer perceptron, SVM, RBF network) can predict links with comparable performances, but SVM outperforms all of them with narrow margin in all performance measures. Again, ranking of features with popular feature ranking algorithms shows that a small subset of features always plays a significant role in link prediction.
BibTeX
@article{openalexw2768375068,
author = "Hasan, Mohammad Al and Chaoji, Vineet and Salem, Saeed and Zaki, Mohammed J.",
title = "Link prediction using supervised learning",
year = "2006",
abstract = "Social network analysis has attracted much attention in recent years. Link prediction is a key research direction within this area. In this paper, we study link prediction as a supervised learning task. Along the way, we identify a set of features that are key to the performance under the supervised learning setup. The identified features are very easy to compute, and at the same time surprisingly e\#ective in solving the link prediction problem. We also explain the e\#ectiveness of the features from their class density distribution. Then we compare di\#erent classes of supervised learning algorithms in terms of their prediction performance using various performance metrics, such as accuracy, precision-recall, F-values, squared error etc. with a 5-fold cross validation. Our results on two practical social network datasets shows that most of the well-known classification algorithms (decision tree, k-NN, multilayer perceptron, SVM, RBF network) can predict links with comparable performances, but SVM outperforms all of them with narrow margin in all performance measures. Again, ranking of features with popular feature ranking algorithms shows that a small subset of features always plays a significant role in link prediction.",
openalex = "W2768375068"
}
8. Liben‐Nowell, David and Kleinberg, Jon, 2007, The link‐prediction problem for social networks: Journal of the American Society for Information Science and Technology.
Abstract
Abstract Given a snapshot of a social network, can we infer which new interactions among its members are likely to occur in the near future? We formalize this question as the link‐prediction problem, and we develop approaches to link prediction based on measures for analyzing the “proximity” of nodes in a network. Experiments on large coauthorship networks suggest that information about future interactions can be extracted from network topology alone, and that fairly subtle measures for detecting node proximity can outperform more direct measures.
BibTeX
@article{doi101002asi20591,
author = "Liben‐Nowell, David and Kleinberg, Jon",
title = "The link‐prediction problem for social networks",
year = "2007",
journal = "Journal of the American Society for Information Science and Technology",
abstract = "Abstract Given a snapshot of a social network, can we infer which new interactions among its members are likely to occur in the near future? We formalize this question as the link‐prediction problem, and we develop approaches to link prediction based on measures for analyzing the “proximity” of nodes in a network. Experiments on large coauthorship networks suggest that information about future interactions can be extracted from network topology alone, and that fairly subtle measures for detecting node proximity can outperform more direct measures.",
url = "https://doi.org/10.1002/asi.20591",
doi = "10.1002/asi.20591",
openalex = "W4232932184",
references = "doi101002sici10974571199009416391aidasi130co29, doi101007bf02289026, doi1010160306457383900626, doi101016s0048733396009171, doi101016s016975529800110x, doi101016s0378873303000091, doi10103830918, doi101073pnas982404, doi101126science2865439509, doi101137s003614450342480"
}
9. Clauset, Aaron and Moore, Cristopher and Newman, M. E. J., 2008, Hierarchical structure and the prediction of missing links in networks: Nature.
BibTeX
@article{doi101038nature06830,
author = "Clauset, Aaron and Moore, Cristopher and Newman, M. E. J.",
title = "Hierarchical structure and the prediction of missing links in networks",
year = "2008",
journal = "Nature",
url = "https://doi.org/10.1038/nature06830",
doi = "10.1038/nature06830",
openalex = "W2157082398",
references = "doi101002asi20591"
}
10. Volz, Julius and Bizer, Christian and Gaedke, Martin and Kobilarov, Georgi, 2009, Discovering and Maintaining Links on the Web of Data: Lecture notes in computer science.
DOI: 10.1007/978-3-642-04930-9_41
BibTeX
@incollection{doi101007978364204930941,
author = "Volz, Julius and Bizer, Christian and Gaedke, Martin and Kobilarov, Georgi",
title = "Discovering and Maintaining Links on the Web of Data",
year = "2009",
booktitle = "Lecture notes in computer science",
url = "https://doi.org/10.1007/978-3-642-04930-9\_41",
doi = "10.1007/978-3-642-04930-9\_41",
openalex = "W1491268609"
}
11. Lü, Linyuan and Jin, Ci-Hang and Zhou, Tao, 2009, Similarity index based on local paths for link prediction of complex networks: Physical Review E.
DOI: 10.1103/physreve.80.046122
Abstract
Predictions of missing links of incomplete networks, such as protein-protein interaction networks or very likely but not yet existent links in evolutionary networks like friendship networks in web society, can be considered as a guideline for further experiments or valuable information for web users. In this paper, we present a local path index to estimate the likelihood of the existence of a link between two nodes. We propose a network model with controllable density and noise strength in generating links, as well as collect data of six real networks. Extensive numerical simulations on both modeled networks and real networks demonstrated the high effectiveness and efficiency of the local path index compared with two well-known and widely used indices: the common neighbors and the Katz index. Indeed, the local path index provides competitively accurate predictions as the Katz index while requires much less CPU time and memory space than the Katz index, which is therefore a strong candidate for potential practical applications in data mining of huge-size networks.
BibTeX
@article{doi101103physreve80046122,
author = "Lü, Linyuan and Jin, Ci-Hang and Zhou, Tao",
title = "Similarity index based on local paths for link prediction of complex networks",
year = "2009",
journal = "Physical Review E",
abstract = "Predictions of missing links of incomplete networks, such as protein-protein interaction networks or very likely but not yet existent links in evolutionary networks like friendship networks in web society, can be considered as a guideline for further experiments or valuable information for web users. In this paper, we present a local path index to estimate the likelihood of the existence of a link between two nodes. We propose a network model with controllable density and noise strength in generating links, as well as collect data of six real networks. Extensive numerical simulations on both modeled networks and real networks demonstrated the high effectiveness and efficiency of the local path index compared with two well-known and widely used indices: the common neighbors and the Katz index. Indeed, the local path index provides competitively accurate predictions as the Katz index while requires much less CPU time and memory space than the Katz index, which is therefore a strong candidate for potential practical applications in data mining of huge-size networks.",
url = "https://doi.org/10.1103/physreve.80.046122",
doi = "10.1103/physreve.80.046122",
openalex = "W1564354600"
}
12. Zhou, Tao and Lü, Linyuan and Zhang, Yi‐Cheng, 2009, Predicting missing links via local information: The European Physical Journal B.
DOI: 10.1140/epjb/e2009-00335-8
BibTeX
@article{doi101140epjbe2009003358,
author = "Zhou, Tao and Lü, Linyuan and Zhang, Yi‐Cheng",
title = "Predicting missing links via local information",
year = "2009",
journal = "The European Physical Journal B",
url = "https://doi.org/10.1140/epjb/e2009-00335-8",
doi = "10.1140/epjb/e2009-00335-8",
openalex = "W2007444087",
references = "doi101002asi20591"
}
13. Bizer, Christian and Heath, Tom and Berners‐Lee, Tim, 2009, Linked Data - The Story So Far: International Journal on Semantic Web and Information Systems.
Abstract
The term “Linked Data” refers to a set of best practices for publishing and connecting structured data on the Web. These best practices have been adopted by an increasing number of data providers over the last three years, leading to the creation of a global data space containing billions of assertions— the Web of Data. In this article, the authors present the concept and technical principles of Linked Data, and situate these within the broader context of related technological developments. They describe progress to date in publishing Linked Data on the Web, review applications that have been developed to exploit the Web of Data, and map out a research agenda for the Linked Data community as it moves forward.
BibTeX
@article{doi104018jswis2009081901,
author = "Bizer, Christian and Heath, Tom and Berners‐Lee, Tim",
title = "Linked Data - The Story So Far",
year = "2009",
journal = "International Journal on Semantic Web and Information Systems",
abstract = "The term “Linked Data” refers to a set of best practices for publishing and connecting structured data on the Web. These best practices have been adopted by an increasing number of data providers over the last three years, leading to the creation of a global data space containing billions of assertions— the Web of Data. In this article, the authors present the concept and technical principles of Linked Data, and situate these within the broader context of related technological developments. They describe progress to date in publishing Linked Data on the Web, review applications that have been developed to exploit the Web of Data, and map out a research agenda for the Linked Data community as it moves forward.",
url = "https://doi.org/10.4018/jswis.2009081901",
doi = "10.4018/jswis.2009081901",
openalex = "W2015191210"
}
14. Leskovec, Jure and Huttenlocher, Daniel P. and Kleinberg, Jon, 2010, Predicting positive and negative links in online social networks.
Abstract
We study online social networks in which relationships can be either positive (indicating relations such as friendship) or negative (indicating relations such as opposition or antagonism). Such a mix of positive and negative links arise in a variety of online settings; we study datasets from Epinions, Slashdot and Wikipedia. We find that the signs of links in the underlying social networks can be predicted with high accuracy, using models that generalize across this diverse range of sites. These models provide insight into some of the fundamental principles that drive the formation of signed links in networks, shedding light on theories of balance and status from social psychology; they also suggest social computing applications by which the attitude of one user toward another can be estimated from evidence provided by their relationships with other members of the surrounding social network.
BibTeX
@article{doi10114517726901772756,
author = "Leskovec, Jure and Huttenlocher, Daniel P. and Kleinberg, Jon",
title = "Predicting positive and negative links in online social networks",
year = "2010",
abstract = "We study online social networks in which relationships can be either positive (indicating relations such as friendship) or negative (indicating relations such as opposition or antagonism). Such a mix of positive and negative links arise in a variety of online settings; we study datasets from Epinions, Slashdot and Wikipedia. We find that the signs of links in the underlying social networks can be predicted with high accuracy, using models that generalize across this diverse range of sites. These models provide insight into some of the fundamental principles that drive the formation of signed links in networks, shedding light on theories of balance and status from social psychology; they also suggest social computing applications by which the attitude of one user toward another can be estimated from evidence provided by their relationships with other members of the surrounding social network.",
url = "https://doi.org/10.1145/1772690.1772756",
doi = "10.1145/1772690.1772756",
openalex = "W2073415627"
}
15. Isele, Robert and Jentzsch, Anja and Bizer, Christian, 2010, Silk server - adding missing links while consuming linked data: MADOC (University of Mannheim).
Abstract
Abstract. The Web of Linked Data is built upon the idea that data items on the Web are connected by RDF links. Sadly, the reality on the Web shows that Linked Data sources set some RDF links pointing at data items in related data sources, but they clearly do not set RDF links to all data sources that provide related data. In this paper, we present Silk Server, an identity resolution component, which can be used within Linked Data application architectures to augment Web data with additional RDF links. Silk Server is designed to be used with an incoming stream of RDF instances, produced for example by a Linked Data crawler. Silk Server matches the RDF descriptions of incoming instances against a local set of known instances and discovers missing links between them. Based on this assessment, an application can store data about newly discovered instances in its repository or fuse data that is already known about an entity with additional data about the entity from the Web. Afterwards, we report on the results of an experiment in which Silk Server was used to generate RDF links between authors and publications from the Semantic Web Dog Food Corpus and a stream of FOAF profiles that were crawled from the Web.
BibTeX
@article{openalexw2394691298,
author = "Isele, Robert and Jentzsch, Anja and Bizer, Christian",
title = "Silk server - adding missing links while consuming linked data",
year = "2010",
journal = "MADOC (University of Mannheim)",
abstract = "Abstract. The Web of Linked Data is built upon the idea that data items on the Web are connected by RDF links. Sadly, the reality on the Web shows that Linked Data sources set some RDF links pointing at data items in related data sources, but they clearly do not set RDF links to all data sources that provide related data. In this paper, we present Silk Server, an identity resolution component, which can be used within Linked Data application architectures to augment Web data with additional RDF links. Silk Server is designed to be used with an incoming stream of RDF instances, produced for example by a Linked Data crawler. Silk Server matches the RDF descriptions of incoming instances against a local set of known instances and discovers missing links between them. Based on this assessment, an application can store data about newly discovered instances in its repository or fuse data that is already known about an entity with additional data about the entity from the Web. Afterwards, we report on the results of an experiment in which Silk Server was used to generate RDF links between authors and publications from the Semantic Web Dog Food Corpus and a stream of FOAF profiles that were crawled from the Web.",
url = "https://openalex.org/W2394691298",
openalex = "W2394691298",
references = "doi101007978364204930941, doi101007s1322201000217, doi101016jwebsem200509001, doi101016jwebsem200802005, doi10114514566501456651, doi104018jswis2009081901, openalexw156172657, openalexw1577231857, openalexw2403886113, openalexw2611894836"
}
16. Fire, Michael and Tenenboim, Lena and Lesser, Ofrit and Puzis, Rami and Rokach, Lior and Elovici, Yuval, 2011, Link Prediction in Social Networks Using Computationally Efficient Topological Features.
DOI: 10.1109/passat/socialcom.2011.20
Abstract
Online social networking sites have become increasingly popular over the last few years. As a result, new interdisciplinary research directions have emerged in which social network analysis methods are applied to networks containing hundreds millions of users. Unfortunately, links between individuals may be missing due to imperfect acquirement processes or because they are not yet reflected in the online network (i.e., friends in real world did not form a virtual connection.) Existing link prediction techniques lack the scalability required for full application on a continuously growing social network which may be adding everyday users with thousands of connections. The primary bottleneck in link prediction techniques is extracting structural features required for classifying links. In this paper we propose a set of simple, easy-to-compute structural features that can be analyzed to identify missing links. We show that a machine learning classifier trained using the proposed simple structural features can successfully identify missing links even when applied to a hard problem of classifying links between individuals who have at least one common friend. A new friends measure that we developed is shown to be a good predictor for missing links and an evaluation experiment was performed on five large social networks datasets: Face book, Flickr, You Tube, Academia and The Marker. Our methods can provide social network site operators with the capability of helping users to find known, offline contacts and to discover new friends online. They may also be used for exposing hidden links in an online social network.
BibTeX
@article{doi101109passatsocialcom201120,
author = "Fire, Michael and Tenenboim, Lena and Lesser, Ofrit and Puzis, Rami and Rokach, Lior and Elovici, Yuval",
title = "Link Prediction in Social Networks Using Computationally Efficient Topological Features",
year = "2011",
abstract = "Online social networking sites have become increasingly popular over the last few years. As a result, new interdisciplinary research directions have emerged in which social network analysis methods are applied to networks containing hundreds millions of users. Unfortunately, links between individuals may be missing due to imperfect acquirement processes or because they are not yet reflected in the online network (i.e., friends in real world did not form a virtual connection.) Existing link prediction techniques lack the scalability required for full application on a continuously growing social network which may be adding everyday users with thousands of connections. The primary bottleneck in link prediction techniques is extracting structural features required for classifying links. In this paper we propose a set of simple, easy-to-compute structural features that can be analyzed to identify missing links. We show that a machine learning classifier trained using the proposed simple structural features can successfully identify missing links even when applied to a hard problem of classifying links between individuals who have at least one common friend. A new friends measure that we developed is shown to be a good predictor for missing links and an evaluation experiment was performed on five large social networks datasets: Face book, Flickr, You Tube, Academia and The Marker. Our methods can provide social network site operators with the capability of helping users to find known, offline contacts and to discover new friends online. They may also be used for exposing hidden links in an online social network.",
url = "https://doi.org/10.1109/passat/socialcom.2011.20",
doi = "10.1109/passat/socialcom.2011.20",
openalex = "W2542727820",
references = "doi101002asi20591, doi101007bf02289026, doi10103830918, doi101126science2865439509, doi10114512983061298311, doi10114516562741656278, doi1025080tcwv9851, doi1041359781412973380n22, doi104324978008087809612, openalexw1565746575"
}
17. Han, Xianpei and Sun, Le and Zhao, Jun, 2011, Collective entity linking in web text.
Abstract
Entity Linking (EL) is the task of linking name mentions in Web text with their referent entities in a knowledge base. Traditional EL methods usually link name mentions in a document by assuming them to be independent. However, there is often additional interdependence between different EL decisions, i.e., the entities in the same document should be semantically related to each other. In these cases, Collective Entity Linking, in which the name mentions in the same document are linked jointly by exploiting the interdependence between them, can improve the entity linking accuracy.
BibTeX
@article{doi10114520099162010019,
author = "Han, Xianpei and Sun, Le and Zhao, Jun",
title = "Collective entity linking in web text",
year = "2011",
abstract = "Entity Linking (EL) is the task of linking name mentions in Web text with their referent entities in a knowledge base. Traditional EL methods usually link name mentions in a document by assuming them to be independent. However, there is often additional interdependence between different EL decisions, i.e., the entities in the same document should be semantically related to each other. In these cases, Collective Entity Linking, in which the name mentions in the same document are linked jointly by exploiting the interdependence between them, can improve the entity linking accuracy.",
url = "https://doi.org/10.1145/2009916.2010019",
doi = "10.1145/2009916.2010019",
openalex = "W2085337304",
references = "doi10114511342711134284"
}
18. Ngomo, Axel-cyrille Ngonga and Auer, Sören, 2011, LIMES -- A Time-Efficient Approach for Large-Scale Link Discovery on the Web of Data.
Abstract
The Linked Data paradigm has evolved into a powerful enabler for the transition from the document-oriented Web into the Semantic Web. While the amount of data published as Linked Data grows steadily and has surpassed 25 billion triples, less than 5 % of these triples are links between knowledge bases. Link discovery frameworks provide the functionality necessary to discover missing links between knowledge bases in a semi-automatic fashion. Yet, the task of linking knowledge bases requires a significant amount of time, especially when it is carried out on large data sets. This paper presents and evaluates LIMES- a novel timeefficient approach for link discovery in metric spaces. Our approach utilizes the mathematical characteristics of metric spaces to compute estimates of the similarity between instances. These estimates are then used to filter out a large amount of those instance pairs that do not suffice the mapping conditions. Thus, LIMES can reduce the number of comparisons needed during the mapping process by several orders of magnitude. We present the mathematical foundation and the core algorithms employed in the implementation. We evaluate LIMES with synthetic data to elucidate its behavior on small and large data sets with different configurations and show that our approach can significantly reduce the time complexity of a mapping task. In addition, we compare the runtime of our framework with a state-ofthe-art link discovery tool. We show that LIMES is more than 60 times faster when mapping large knowledge bases.
BibTeX
@article{openalexw192652968,
author = "Ngomo, Axel-cyrille Ngonga and Auer, Sören",
title = "LIMES -- A Time-Efficient Approach for Large-Scale Link Discovery on the Web of Data",
year = "2011",
abstract = "The Linked Data paradigm has evolved into a powerful enabler for the transition from the document-oriented Web into the Semantic Web. While the amount of data published as Linked Data grows steadily and has surpassed 25 billion triples, less than 5 \% of these triples are links between knowledge bases. Link discovery frameworks provide the functionality necessary to discover missing links between knowledge bases in a semi-automatic fashion. Yet, the task of linking knowledge bases requires a significant amount of time, especially when it is carried out on large data sets. This paper presents and evaluates LIMES- a novel timeefficient approach for link discovery in metric spaces. Our approach utilizes the mathematical characteristics of metric spaces to compute estimates of the similarity between instances. These estimates are then used to filter out a large amount of those instance pairs that do not suffice the mapping conditions. Thus, LIMES can reduce the number of comparisons needed during the mapping process by several orders of magnitude. We present the mathematical foundation and the core algorithms employed in the implementation. We evaluate LIMES with synthetic data to elucidate its behavior on small and large data sets with different configurations and show that our approach can significantly reduce the time complexity of a mapping task. In addition, we compare the runtime of our framework with a state-ofthe-art link discovery tool. We show that LIMES is more than 60 times faster when mapping large knowledge bases.",
url = "https://openalex.org/W192652968",
openalex = "W192652968",
references = "doi101007978354076298052, doi101007978364204930941, doi101109tit2005844059, doi101126science1136800, doi10114514566501456651, doi104018jswis2009081901, openalexw1647671624, openalexw2171574281, openalexw2611200784, openalexw46452414"
}
19. Han, Xianpei and Sun, Le, 2011, A Generative Entity-Mention Model for Linking Entities with Knowledge Base.
Abstract
Linking entities with knowledge base (entity linking) is a key issue in bridging the textual data with the structural knowledge base. Due to the name variation problem and the name ambiguity problem, the entity linking decisions are critically depending on the heterogenous knowledge of entities. In this paper, we propose a generative probabilistic model, called entity-mention model, which can leverage heterogenous entity knowledge (including popularity knowledge, name knowledge and context knowledge) for the entity linking task. In our model, each name mention to be linked is modeled as a sample generated through a three-step generative story, and the entity knowledge is encoded in the distribution of entities in document P(e), the distribution of possible names of a specific entity P(s|e), and the distribution of possible contexts of a specific entity P(c|e). To find the referent entity of a name mention, our method combines the evidences from all the three distributions P(e), P(s|e) and P(c|e). Experimental results show that our method can significantly outperform the traditional methods. 1
BibTeX
@article{openalexw2162638401,
author = "Han, Xianpei and Sun, Le",
title = "A Generative Entity-Mention Model for Linking Entities with Knowledge Base",
year = "2011",
abstract = "Linking entities with knowledge base (entity linking) is a key issue in bridging the textual data with the structural knowledge base. Due to the name variation problem and the name ambiguity problem, the entity linking decisions are critically depending on the heterogenous knowledge of entities. In this paper, we propose a generative probabilistic model, called entity-mention model, which can leverage heterogenous entity knowledge (including popularity knowledge, name knowledge and context knowledge) for the entity linking task. In our model, each name mention to be linked is modeled as a sample generated through a three-step generative story, and the entity knowledge is encoded in the distribution of entities in document P(e), the distribution of possible names of a specific entity P(s|e), and the distribution of possible contexts of a specific entity P(c|e). To find the referent entity of a name mention, our method combines the evidences from all the three distributions P(e), P(s|e) and P(c|e). Experimental results show that our method can significantly outperform the traditional methods. 1",
openalex = "W2162638401",
references = "doi10114511342711134284"
}
20. Schultz, Andreas and Matteini, Andrea and Isele, Robert and Bizer, Christian and Becker, Christian, 2011, LDIF - Linked Data Integration Framework: MADOC (University of Mannheim).
Abstract
Abstract. The LDIF- Linked Data Integration Framework can be used within Linked Data applications to translate heterogeneous data from the Web of Linked Data into a clean local target representation while keeping track of data provenance. LDIF provides an expressive mapping language for translating data from the various vocabularies that are used on the Web into a consistent, local target vocabulary. LDIF includes an identity resolution component which discovers URI aliases in the input data and replaces them with a single target URI based on user-provided matching heuristics. For provenance tracking, the LDIF framework employs the Named Graphs data model. This paper describes the architecture of the LDIF framework and presents a performance evaluation of a life science use case.
BibTeX
@article{openalexw2405571848,
author = "Schultz, Andreas and Matteini, Andrea and Isele, Robert and Bizer, Christian and Becker, Christian",
title = "LDIF - Linked Data Integration Framework",
year = "2011",
journal = "MADOC (University of Mannheim)",
abstract = "Abstract. The LDIF- Linked Data Integration Framework can be used within Linked Data applications to translate heterogeneous data from the Web of Linked Data into a clean local target representation while keeping track of data provenance. LDIF provides an expressive mapping language for translating data from the various vocabularies that are used on the Web into a consistent, local target vocabulary. LDIF includes an identity resolution component which discovers URI aliases in the input data and replaces them with a single target URI based on user-provided matching heuristics. For provenance tracking, the LDIF framework employs the Named Graphs data model. This paper describes the architecture of the LDIF framework and presents a performance evaluation of a life science use case.",
openalex = "W2405571848",
references = "openalexw2394691298"
}
21. Fire, Michael and Tenenboim-Chekina, Lena and Puzis, Rami and Lesser, Ofrit and Rokach, Lior and Elovici, Yuval, 2013, Computationally efficient link prediction in a variety of social networks: ACM Transactions on Intelligent Systems and Technology.
Abstract
Online social networking sites have become increasingly popular over the last few years. As a result, new interdisciplinary research directions have emerged in which social network analysis methods are applied to networks containing hundreds of millions of users. Unfortunately, links between individuals may be missing either due to an imperfect acquirement process or because they are not yet reflected in the online network (i.e., friends in the real world did not form a virtual connection). The primary bottleneck in link prediction techniques is extracting the structural features required for classifying links. In this article, we propose a set of simple, easy-to-compute structural features that can be analyzed to identify missing links. We show that by using simple structural features, a machine learning classifier can successfully identify missing links, even when applied to a predicament of classifying links between individuals with at least one common friend. We also present a method for calculating the amount of data needed in order to build more accurate classifiers. The new Friends measure and Same community features we developed are shown to be good predictors for missing links. An evaluation experiment was performed on ten large social networks datasets: Academia.edu, DBLP, Facebook, Flickr, Flixster, Google+, Gowalla, TheMarker, Twitter, and YouTube. Our methods can provide social network site operators with the capability of helping users to find known, offline contacts and to discover new friends online. They may also be used for exposing hidden links in online social networks.
BibTeX
@article{doi10114525421822542192,
author = "Fire, Michael and Tenenboim-Chekina, Lena and Puzis, Rami and Lesser, Ofrit and Rokach, Lior and Elovici, Yuval",
title = "Computationally efficient link prediction in a variety of social networks",
year = "2013",
journal = "ACM Transactions on Intelligent Systems and Technology",
abstract = "Online social networking sites have become increasingly popular over the last few years. As a result, new interdisciplinary research directions have emerged in which social network analysis methods are applied to networks containing hundreds of millions of users. Unfortunately, links between individuals may be missing either due to an imperfect acquirement process or because they are not yet reflected in the online network (i.e., friends in the real world did not form a virtual connection). The primary bottleneck in link prediction techniques is extracting the structural features required for classifying links. In this article, we propose a set of simple, easy-to-compute structural features that can be analyzed to identify missing links. We show that by using simple structural features, a machine learning classifier can successfully identify missing links, even when applied to a predicament of classifying links between individuals with at least one common friend. We also present a method for calculating the amount of data needed in order to build more accurate classifiers. The new Friends measure and Same community features we developed are shown to be good predictors for missing links. An evaluation experiment was performed on ten large social networks datasets: Academia.edu, DBLP, Facebook, Flickr, Flixster, Google+, Gowalla, TheMarker, Twitter, and YouTube. Our methods can provide social network site operators with the capability of helping users to find known, offline contacts and to discover new friends online. They may also be used for exposing hidden links in online social networks.",
url = "https://doi.org/10.1145/2542182.2542192",
doi = "10.1145/2542182.2542192",
openalex = "W2069186598",
references = "doi101007bf02289026, doi10103830918, doi101073pnas122653799, doi10108817425468200810p10008, doi101109passatsocialcom201120, doi101126science2865439509, doi10114512983061298311, doi10114516562741656278, doi101609icwsmv4i114033, doi1025080tcwv9851, doi104324978008087809612"
}
22. Gao, Fei and Musiał, Katarzyna and Cooper, Colin and Tsoka, Sophia, 2015, Link Prediction Methods and Their Accuracy for Different Social Networks and Network Metrics: Scientific Programming.
Abstract
Currently, we are experiencing a rapid growth of the number of social-based online systems. The availability of the vast amounts of data gathered in those systems brings new challenges that we face when trying to analyse it. One of the intensively researched topics is the prediction of social connections between users. Although a lot of effort has been made to develop new prediction approaches, the existing methods are not comprehensively analysed. In this paper we investigate the correlation between network metrics and accuracy of different prediction methods. We selected six time-stamped real-world social networks and ten most widely used link prediction methods. The results of the experiments show that the performance of some methods has a strong correlation with certain network metrics. We managed to distinguish “prediction friendly” networks, for which most of the prediction methods give good performance, as well as “prediction unfriendly” networks, for which most of the methods result in high prediction error. Correlation analysis between network metrics and prediction accuracy of prediction methods may form the basis of a metalearning system where based on network characteristics it will be able to recommend the right prediction method for a given network.
BibTeX
@article{doi1011552015172879,
author = "Gao, Fei and Musiał, Katarzyna and Cooper, Colin and Tsoka, Sophia",
title = "Link Prediction Methods and Their Accuracy for Different Social Networks and Network Metrics",
year = "2015",
journal = "Scientific Programming",
abstract = "Currently, we are experiencing a rapid growth of the number of social-based online systems. The availability of the vast amounts of data gathered in those systems brings new challenges that we face when trying to analyse it. One of the intensively researched topics is the prediction of social connections between users. Although a lot of effort has been made to develop new prediction approaches, the existing methods are not comprehensively analysed. In this paper we investigate the correlation between network metrics and accuracy of different prediction methods. We selected six time-stamped real-world social networks and ten most widely used link prediction methods. The results of the experiments show that the performance of some methods has a strong correlation with certain network metrics. We managed to distinguish “prediction friendly” networks, for which most of the prediction methods give good performance, as well as “prediction unfriendly” networks, for which most of the methods result in high prediction error. Correlation analysis between network metrics and prediction accuracy of prediction methods may form the basis of a metalearning system where based on network characteristics it will be able to recommend the right prediction method for a given network.",
url = "https://doi.org/10.1155/2015/172879",
doi = "10.1155/2015/172879",
openalex = "W1482500327",
references = "doi101016jphysrep200510009, doi10103830918, doi10103835075138, doi101090cbms06206, doi101103physrevlett863200, doi101109passatsocialcom201120, doi101126science2865439509, doi101137s003614450342480, doi101148radiology14317063747, openalexw1981745143, openalexw2799004609"
}
23. Nentwig, Markus and Hartung, Michael and Ngomo, Axel-Cyrille Ngonga and Rahm, Erhard, 2016, A survey of current Link Discovery frameworks: Semantic Web.
Abstract
Links build the backbone of the Linked Data Cloud. With the steady growth in size of datasets comes an increased need for end users to know which frameworks to use for deriving links between datasets. In this survey, we comparatively evaluate current Link Discovery tools and frameworks. For this pu rpose, we outline general requirements and derive a generic architecture of Link Discovery frameworks. Based on this generic architecture, we study and compare the features of state-of-the-art linking frameworks. We also analyze reported performance evaluations for the different frameworks. Finally, we derive insights pertaining to possible future developments in the domain of Link Discovery.
BibTeX
@article{doi103233sw150210,
author = "Nentwig, Markus and Hartung, Michael and Ngomo, Axel-Cyrille Ngonga and Rahm, Erhard",
title = "A survey of current Link Discovery frameworks",
year = "2016",
journal = "Semantic Web",
abstract = "Links build the backbone of the Linked Data Cloud. With the steady growth in size of datasets comes an increased need for end users to know which frameworks to use for deriving links between datasets. In this survey, we comparatively evaluate current Link Discovery tools and frameworks. For this pu rpose, we outline general requirements and derive a generic architecture of Link Discovery frameworks. Based on this generic architecture, we study and compare the features of state-of-the-art linking frameworks. We also analyze reported performance evaluations for the different frameworks. Finally, we derive insights pertaining to possible future developments in the domain of Link Discovery.",
url = "https://doi.org/10.3233/sw-150210",
doi = "10.3233/sw-150210",
openalex = "W2239873446",
references = "openalexw192652968, openalexw2394691298"
}
24. Yasami, Yasser and Safaei, Farshad, 2017, A novel multilayer model for missing link prediction and future link forecasting in dynamic complex networks: Physica A Statistical Mechanics and its Applications.
DOI: 10.1016/j.physa.2017.11.134
BibTeX
@article{doi101016jphysa201711134,
author = "Yasami, Yasser and Safaei, Farshad",
title = "A novel multilayer model for missing link prediction and future link forecasting in dynamic complex networks",
year = "2017",
journal = "Physica A Statistical Mechanics and its Applications",
url = "https://doi.org/10.1016/j.physa.2017.11.134",
doi = "10.1016/j.physa.2017.11.134",
openalex = "W2772410093",
references = "doi101007s1327801301428"
}
25. Martinčić-Ipšić, Sanda and Močibob, Edvin and Perc, Matjaž, 2017, Link prediction on Twitter: PLoS ONE.
DOI: 10.1371/journal.pone.0181079
Abstract
With over 300 million active users, Twitter is among the largest online news and social networking services in existence today. Open access to information on Twitter makes it a valuable source of data for research on social interactions, sentiment analysis, content diffusion, link prediction, and the dynamics behind human collective behaviour in general. Here we use Twitter data to construct co-occurrence language networks based on hashtags and based on all the words in tweets, and we use these networks to study link prediction by means of different methods and evaluation metrics. In addition to using five known methods, we propose two effective weighted similarity measures, and we compare the obtained outcomes in dependence on the selected semantic context of topics on Twitter. We find that hashtag networks yield to a large degree equal results as all-word networks, thus supporting the claim that hashtags alone robustly capture the semantic context of tweets, and as such are useful and suitable for studying the content and categorization. We also introduce ranking diagrams as an efficient tool for the comparison of the performance of different link prediction algorithms across multiple datasets. Our research indicates that successful link prediction algorithms work well in correctly foretelling highly probable links even if the information about a network structure is incomplete, and they do so even if the semantic context is rationalized to hashtags.
BibTeX
@article{doi101371journalpone0181079,
author = "Martinčić-Ipšić, Sanda and Močibob, Edvin and Perc, Matjaž",
title = "Link prediction on Twitter",
year = "2017",
journal = "PLoS ONE",
abstract = "With over 300 million active users, Twitter is among the largest online news and social networking services in existence today. Open access to information on Twitter makes it a valuable source of data for research on social interactions, sentiment analysis, content diffusion, link prediction, and the dynamics behind human collective behaviour in general. Here we use Twitter data to construct co-occurrence language networks based on hashtags and based on all the words in tweets, and we use these networks to study link prediction by means of different methods and evaluation metrics. In addition to using five known methods, we propose two effective weighted similarity measures, and we compare the obtained outcomes in dependence on the selected semantic context of topics on Twitter. We find that hashtag networks yield to a large degree equal results as all-word networks, thus supporting the claim that hashtags alone robustly capture the semantic context of tweets, and as such are useful and suitable for studying the content and categorization. We also introduce ranking diagrams as an efficient tool for the comparison of the performance of different link prediction algorithms across multiple datasets. Our research indicates that successful link prediction algorithms work well in correctly foretelling highly probable links even if the information about a network structure is incomplete, and they do so even if the semantic context is rationalized to hashtags.",
url = "https://doi.org/10.1371/journal.pone.0181079",
doi = "10.1371/journal.pone.0181079",
openalex = "W2737056279",
references = "doi101007s1327801301428"
}
26. Winter, Sam De and Decuypere, Tim and Mitrović, Sandra and Baesens, Bart and Weerdt, Jochen De, 2018, Combining Temporal Aspects of Dynamic Networks with Node2Vec for a more Efficient Dynamic Link Prediction.
DOI: 10.1109/asonam.2018.8508272
Abstract
In many real-life applications it is crucial to be able to, given a collection of link states of a network in a certain time period, accurately predict the link state of the network at a future time. This is known as dynamic link prediction, which compared to its static counterpart is more complex, as capturing the temporal characteristics is a non-trivial task. This explains while still majority of today's research in network representation learning focuses on static setting ignoring temporal information. In this work, we focus on one such case and aim at extending node2vec, representation learning method successfully applied for static link prediction, to a dynamic setup. This extended method is applied and validated on several real-life networks with different properties. Results show that taking into account dynamic aspect outperforms static approach. Additionally, based on the network properties, recommendations are given for the node2vec parameters.
BibTeX
@article{doi101109asonam20188508272,
author = "Winter, Sam De and Decuypere, Tim and Mitrović, Sandra and Baesens, Bart and Weerdt, Jochen De",
title = "Combining Temporal Aspects of Dynamic Networks with Node2Vec for a more Efficient Dynamic Link Prediction",
year = "2018",
abstract = "In many real-life applications it is crucial to be able to, given a collection of link states of a network in a certain time period, accurately predict the link state of the network at a future time. This is known as dynamic link prediction, which compared to its static counterpart is more complex, as capturing the temporal characteristics is a non-trivial task. This explains while still majority of today's research in network representation learning focuses on static setting ignoring temporal information. In this work, we focus on one such case and aim at extending node2vec, representation learning method successfully applied for static link prediction, to a dynamic setup. This extended method is applied and validated on several real-life networks with different properties. Results show that taking into account dynamic aspect outperforms static approach. Additionally, based on the network properties, recommendations are given for the node2vec parameters.",
url = "https://doi.org/10.1109/asonam.2018.8508272",
doi = "10.1109/asonam.2018.8508272",
openalex = "W2898512177",
references = "doi101007s1046201795902"
}
27. Fu, Chenbo and Zhao, Minghao and Fan, Lu and Chen, Xinyi and Chen, Jinyin and Wu, Zhefu and Xia, Yongxiang and Xuan, Qi, 2018, Link Weight Prediction Using Supervised Learning Methods and Its Application to Yelp Layered Network: IEEE Transactions on Knowledge and Data Engineering.
DOI: 10.1109/tkde.2018.2801854
Abstract
Real-world networks feature weights of interactions, where link weights often represent some physical attributes. In many situations, to recover the missing data or predict the network evolution, we need to predict link weights in a network. In this paper, we first proposed a series of new centrality indices for links in line graph. Then, utilizing these line graph indices, as well as a number of original graph indices, we designed three supervised learning methods to realize link weight prediction both in the networks of single layer and multiple layers, which perform much better than several recently proposed baseline methods. We found that the resource allocation index (RA) plays a more important role in the weight prediction than other topological properties, and the line graph indices are at least as important as the original graph indices in link weight prediction. In particular, the success application of our methods on Yelp layered network suggests that we can indeed predict the offline co-foraging behaviors of users just based on their online social interactions, which may open a new direction for link weight prediction algorithms, and meanwhile provide insights to design better restaurant recommendation systems.
BibTeX
@article{doi101109tkde20182801854,
author = "Fu, Chenbo and Zhao, Minghao and Fan, Lu and Chen, Xinyi and Chen, Jinyin and Wu, Zhefu and Xia, Yongxiang and Xuan, Qi",
title = "Link Weight Prediction Using Supervised Learning Methods and Its Application to Yelp Layered Network",
year = "2018",
journal = "IEEE Transactions on Knowledge and Data Engineering",
abstract = "Real-world networks feature weights of interactions, where link weights often represent some physical attributes. In many situations, to recover the missing data or predict the network evolution, we need to predict link weights in a network. In this paper, we first proposed a series of new centrality indices for links in line graph. Then, utilizing these line graph indices, as well as a number of original graph indices, we designed three supervised learning methods to realize link weight prediction both in the networks of single layer and multiple layers, which perform much better than several recently proposed baseline methods. We found that the resource allocation index (RA) plays a more important role in the weight prediction than other topological properties, and the line graph indices are at least as important as the original graph indices in link weight prediction. In particular, the success application of our methods on Yelp layered network suggests that we can indeed predict the offline co-foraging behaviors of users just based on their online social interactions, which may open a new direction for link weight prediction algorithms, and meanwhile provide insights to design better restaurant recommendation systems.",
url = "https://doi.org/10.1109/tkde.2018.2801854",
doi = "10.1109/tkde.2018.2801854",
openalex = "W2789443491",
references = "doi1010160378873378900217, doi101023a1010933404324, doi10103830918, doi101073pnas0507655102, doi101073pnas122653799, doi101109passatsocialcom201120, doi101126science2865439509, doi101137s003614450342480, doi101214aos1013203451, openalexw1673310716, openalexw1854214752"
}
28. Chen, Jinyin and Zhang, Jian and Xu, Xuanheng and Fu, Chenbo and Zhang, Dan and Zhang, Qingpeng and Xuan, Qi, 2019, E-LSTM-D: A Deep Learning Framework for Dynamic Network Link Prediction: IEEE Transactions on Systems Man and Cybernetics Systems.
DOI: 10.1109/tsmc.2019.2932913
Abstract
Predicting the potential relations between nodes in networks, known as link prediction, has long been a challenge in network science. However, most studies just focused on link prediction of static network, while real-world networks always evolve over time with the occurrence and vanishing of nodes and links. Dynamic network link prediction (DNLP) thus has been attracting more and more attention since it can better capture the evolution nature of networks, but still most algorithms fail to achieve satisfied prediction accuracy. Motivated by the excellent performance of long short-term memory (LSTM) in processing time series, in this article, we propose a novel encoder-LSTM-decoder (E-LSTM-D) deep learning model to predict dynamic links end to end. It could handle long-term prediction problems, and suits the networks of different scales with fine-tuned structure. To the best of our knowledge, it is the first time that LSTM, together with an encoder-decoder architecture, is applied to link prediction in dynamic networks. This new model is able to automatically learn structural and temporal features in a unified framework, which can predict the links that never appear in the network before. The extensive experiments show that our E-LSTM-D model significantly outperforms newly proposed DNLP methods and obtain the state-of-the-art results.
BibTeX
@article{doi101109tsmc20192932913,
author = "Chen, Jinyin and Zhang, Jian and Xu, Xuanheng and Fu, Chenbo and Zhang, Dan and Zhang, Qingpeng and Xuan, Qi",
title = "E-LSTM-D: A Deep Learning Framework for Dynamic Network Link Prediction",
year = "2019",
journal = "IEEE Transactions on Systems Man and Cybernetics Systems",
abstract = "Predicting the potential relations between nodes in networks, known as link prediction, has long been a challenge in network science. However, most studies just focused on link prediction of static network, while real-world networks always evolve over time with the occurrence and vanishing of nodes and links. Dynamic network link prediction (DNLP) thus has been attracting more and more attention since it can better capture the evolution nature of networks, but still most algorithms fail to achieve satisfied prediction accuracy. Motivated by the excellent performance of long short-term memory (LSTM) in processing time series, in this article, we propose a novel encoder-LSTM-decoder (E-LSTM-D) deep learning model to predict dynamic links end to end. It could handle long-term prediction problems, and suits the networks of different scales with fine-tuned structure. To the best of our knowledge, it is the first time that LSTM, together with an encoder-decoder architecture, is applied to link prediction in dynamic networks. This new model is able to automatically learn structural and temporal features in a unified framework, which can predict the links that never appear in the network before. The extensive experiments show that our E-LSTM-D model significantly outperforms newly proposed DNLP methods and obtain the state-of-the-art results.",
url = "https://doi.org/10.1109/tsmc.2019.2932913",
doi = "10.1109/tsmc.2019.2932913",
openalex = "W2969227524",
references = "doi101109tkde20182801854"
}
29. Makarov, Ilya and Gerasimova, Olga and Sulimov, Pavel and Zhukov, Leonid, 2019, Dual network embedding for representing research interests in the link prediction problem on co-authorship networks: PeerJ Computer Science.
Abstract
We present a study on co-authorship network representation based on network embedding together with additional information on topic modeling of research papers and new edge embedding operator. We use the link prediction (LP) model for constructing a recommender system for searching collaborators with similar research interests. Extracting topics for each paper, we construct keywords co-occurrence network and use its embedding for further generalizing author attributes. Standard graph feature engineering and network embedding methods were combined for constructing co-author recommender system formulated as LP problem and prediction of future graph structure. We evaluate our survey on the dataset containing temporal information on National Research University Higher School of Economics over 25 years of research articles indexed in Russian Science Citation Index and Scopus. Our model of network representation shows better performance for stated binary classification tasks on several co-authorship networks.
BibTeX
@article{doi107717peerjcs172,
author = "Makarov, Ilya and Gerasimova, Olga and Sulimov, Pavel and Zhukov, Leonid",
title = "Dual network embedding for representing research interests in the link prediction problem on co-authorship networks",
year = "2019",
journal = "PeerJ Computer Science",
abstract = "We present a study on co-authorship network representation based on network embedding together with additional information on topic modeling of research papers and new edge embedding operator. We use the link prediction (LP) model for constructing a recommender system for searching collaborators with similar research interests. Extracting topics for each paper, we construct keywords co-occurrence network and use its embedding for further generalizing author attributes. Standard graph feature engineering and network embedding methods were combined for constructing co-author recommender system formulated as LP problem and prediction of future graph structure. We evaluate our survey on the dataset containing temporal information on National Research University Higher School of Economics over 25 years of research articles indexed in Russian Science Citation Index and Scopus. Our model of network representation shows better performance for stated binary classification tasks on several co-authorship networks.",
url = "https://doi.org/10.7717/peerj-cs.172",
doi = "10.7717/peerj-cs.172",
openalex = "W2910026207",
references = "doi1011552015172879"
}
30. Kumari, Anisha and Behera, Ranjan Kumar and Sahoo, Kshira Sagar and Nayyar, Anand and Luhach, Ashish Kr. and Sahoo, Satya Prakash, 2020, Supervised link prediction using structured‐based feature extraction in social network: Concurrency and Computation Practice and Experience.
Abstract
Summary Social network analysis (SNA) has attracted a lot of attention in several domains in the past decades. It can be of 2‐folds: one is content‐based, and another one is structured‐based analysis. Link prediction is one of the emerging research problems, which comes under structured‐based analysis that deals with predicting the missing link, which is likely to appear in the future. In this article, the supervised machine learning techniques have been implemented to predict the possibilities of establishing the links in future. The major contribution in this article lies in feature construction from the topological structure of the network. Several structured‐based similarity measures have been considered for preparing the feature vector for each nonexisting links in the network. The performance of the proposed algorithm has been extensively validated by comparing with other link prediction algorithms using both real‐world and synthetic data sets.
BibTeX
@article{doi101002cpe5839,
author = "Kumari, Anisha and Behera, Ranjan Kumar and Sahoo, Kshira Sagar and Nayyar, Anand and Luhach, Ashish Kr. and Sahoo, Satya Prakash",
title = "Supervised link prediction using structured‐based feature extraction in social network",
year = "2020",
journal = "Concurrency and Computation Practice and Experience",
abstract = "Summary Social network analysis (SNA) has attracted a lot of attention in several domains in the past decades. It can be of 2‐folds: one is content‐based, and another one is structured‐based analysis. Link prediction is one of the emerging research problems, which comes under structured‐based analysis that deals with predicting the missing link, which is likely to appear in the future. In this article, the supervised machine learning techniques have been implemented to predict the possibilities of establishing the links in future. The major contribution in this article lies in feature construction from the topological structure of the network. Several structured‐based similarity measures have been considered for preparing the feature vector for each nonexisting links in the network. The performance of the proposed algorithm has been extensively validated by comparing with other link prediction algorithms using both real‐world and synthetic data sets.",
url = "https://doi.org/10.1002/cpe.5839",
doi = "10.1002/cpe.5839",
openalex = "W3040126871",
references = "doi101002asi20591, doi101007s002650030651y, doi101007s1046201795902, doi101016jphysa201011027, doi101016jscs201712022, doi101016s0378873303000091, doi101103physreve80046122, doi101103physrevlett843201, doi101109passatsocialcom201120, doi101126science28754612115a, doi10114510653851065415, doi10114530979833098069, doi1011552015172879"
}
31. Ghasemian, Amir and Hosseinmardi, Homa and Galstyan, Aram and Airoldi, Edoardo M. and Clauset, Aaron, 2020, Stacking models for nearly optimal link prediction in complex networks: Proceedings of the National Academy of Sciences.
Abstract
Most real-world networks are incompletely observed. Algorithms that can accurately predict which links are missing can dramatically speed up network data collection and improve network model validation. Many algorithms now exist for predicting missing links, given a partially observed network, but it has remained unknown whether a single best predictor exists, how link predictability varies across methods and networks from different domains, and how close to optimality current methods are. We answer these questions by systematically evaluating 203 individual link predictor algorithms, representing three popular families of methods, applied to a large corpus of 550 structurally diverse networks from six scientific domains. We first show that individual algorithms exhibit a broad diversity of prediction errors, such that no one predictor or family is best, or worst, across all realistic inputs. We then exploit this diversity using network-based metalearning to construct a series of "stacked" models that combine predictors into a single algorithm. Applied to a broad range of synthetic networks, for which we may analytically calculate optimal performance, these stacked models achieve optimal or nearly optimal levels of accuracy. Applied to real-world networks, stacked models are superior, but their accuracy varies strongly by domain, suggesting that link prediction may be fundamentally easier in social networks than in biological or technological networks. These results indicate that the state of the art for link prediction comes from combining individual algorithms, which can achieve nearly optimal predictions. We close with a brief discussion of limitations and opportunities for further improvements.
BibTeX
@article{doi101073pnas1914950117,
author = "Ghasemian, Amir and Hosseinmardi, Homa and Galstyan, Aram and Airoldi, Edoardo M. and Clauset, Aaron",
title = "Stacking models for nearly optimal link prediction in complex networks",
year = "2020",
journal = "Proceedings of the National Academy of Sciences",
abstract = {Most real-world networks are incompletely observed. Algorithms that can accurately predict which links are missing can dramatically speed up network data collection and improve network model validation. Many algorithms now exist for predicting missing links, given a partially observed network, but it has remained unknown whether a single best predictor exists, how link predictability varies across methods and networks from different domains, and how close to optimality current methods are. We answer these questions by systematically evaluating 203 individual link predictor algorithms, representing three popular families of methods, applied to a large corpus of 550 structurally diverse networks from six scientific domains. We first show that individual algorithms exhibit a broad diversity of prediction errors, such that no one predictor or family is best, or worst, across all realistic inputs. We then exploit this diversity using network-based metalearning to construct a series of "stacked" models that combine predictors into a single algorithm. Applied to a broad range of synthetic networks, for which we may analytically calculate optimal performance, these stacked models achieve optimal or nearly optimal levels of accuracy. Applied to real-world networks, stacked models are superior, but their accuracy varies strongly by domain, suggesting that link prediction may be fundamentally easier in social networks than in biological or technological networks. These results indicate that the state of the art for link prediction comes from combining individual algorithms, which can achieve nearly optimal predictions. We close with a brief discussion of limitations and opportunities for further improvements.},
url = "https://doi.org/10.1073/pnas.1914950117",
doi = "10.1073/pnas.1914950117",
openalex = "W3083421733",
references = "doi10114525421822542192"
}
32. Wang, Lei and Ren, Jing and Xu, Bo and Li, Jianxin and Luo, Wei and Xia, Feng, 2020, MODEL: Motif-Based Deep Feature Learning for Link Prediction: IEEE Transactions on Computational Social Systems.
DOI: 10.1109/tcss.2019.2962819
Abstract
Link prediction plays an important role in network analysis and applications. Recently, approaches for link prediction have evolved from traditional similarity-based algorithms into embedding-based algorithms. However, most existing approaches fail to exploit the fact that real-world networks are different from random networks. In particular, real-world networks are known to contain motifs, natural network building blocks reflecting the underlying network-generating processes. In this article, we propose a novel embedding algorithm that incorporates network motifs to capture higher order structures in the network. To evaluate its effectiveness for link prediction, experiments were conducted on three types of networks: social networks, biological networks, and academic networks. The results demonstrate that our algorithm outperforms both the traditional similarity-based algorithms (by 20%) and the state-of-the-art embedding-based algorithms (by 19%).
BibTeX
@article{doi101109tcss20192962819,
author = "Wang, Lei and Ren, Jing and Xu, Bo and Li, Jianxin and Luo, Wei and Xia, Feng",
title = "MODEL: Motif-Based Deep Feature Learning for Link Prediction",
year = "2020",
journal = "IEEE Transactions on Computational Social Systems",
abstract = "Link prediction plays an important role in network analysis and applications. Recently, approaches for link prediction have evolved from traditional similarity-based algorithms into embedding-based algorithms. However, most existing approaches fail to exploit the fact that real-world networks are different from random networks. In particular, real-world networks are known to contain motifs, natural network building blocks reflecting the underlying network-generating processes. In this article, we propose a novel embedding algorithm that incorporates network motifs to capture higher order structures in the network. To evaluate its effectiveness for link prediction, experiments were conducted on three types of networks: social networks, biological networks, and academic networks. The results demonstrate that our algorithm outperforms both the traditional similarity-based algorithms (by 20\%) and the state-of-the-art embedding-based algorithms (by 19\%).",
url = "https://doi.org/10.1109/tcss.2019.2962819",
doi = "10.1109/tcss.2019.2962819",
openalex = "W3006318668",
references = "doi101109tkde20182801854"
}
33. Zhao, Zhen and Zlokovic, Berislav V, 2021, Acetylated tau: A missing link between head injury and dementia.: Med (New York, N.Y.).
DOI: 10.1016/j.medj.2021.05.005 Source
Abstract
Head injury is the most significant environmental risk factor for Alzheimer's disease and dementia. Yet, our understanding of its pathophysiology remains limited, and therapeutic targets remain elusive. In a recent issue of Cell, Shin et al. discovered a promising molecular pathway linking head injury to Alzheimer's disease.
BibTeX
@article{doi101016jmedj202105005,
author = "Zhao, Zhen and Zlokovic, Berislav V",
title = "Acetylated tau: A missing link between head injury and dementia.",
year = "2021",
journal = "Med (New York, N.Y.)",
abstract = "Head injury is the most significant environmental risk factor for Alzheimer's disease and dementia. Yet, our understanding of its pathophysiology remains limited, and therapeutic targets remain elusive. In a recent issue of Cell, Shin et al. discovered a promising molecular pathway linking head injury to Alzheimer's disease.",
url = "https://pubmed.ncbi.nlm.nih.gov/35590136/",
doi = "10.1016/j.medj.2021.05.005",
openalex = "W3168388499",
pmid = "35590136",
references = "doi101016jcell202010029, doi101016jcell202103032, doi101016jneuron201008044, doi101016jneuron201305005, doi101038ncb1747, doi101038nrn2808, doi101038nrneurol2016127, doi101152physrev000502017, doi101186alzrt259, doi101186s40478021011787"
}
34. Han, Jaewoong and Jeon, Byeongmin and Geum, Youngjung, 2022, Link Prediction Revisited: New Approach for Anticipating New Innovation Chances Using Technology Convergence: IEEE Transactions on Engineering Management.
Abstract
Link prediction has been actively used to anticipate future technology convergence. However, previous related studies have focused on anticipating within the current network only and fail to predict convergence with possible innovation chances. Hence, a new method is suggested herein for anticipating new innovation chances, where a method for adding innovation signals to the current network is suggested, and link prediction is conducted to the revised network. This article comprises three sections: current network construction, innovation signal detection, and link prediction to the revised network. The suggested framework is applied to mobile financial services as an illustrative example. This article is expected to serve as a systematic method for investigating innovation chances and anticipating possible associations within innovation networks.
BibTeX
@article{doi101109tem20223213867,
author = "Han, Jaewoong and Jeon, Byeongmin and Geum, Youngjung",
title = "Link Prediction Revisited: New Approach for Anticipating New Innovation Chances Using Technology Convergence",
year = "2022",
journal = "IEEE Transactions on Engineering Management",
abstract = "Link prediction has been actively used to anticipate future technology convergence. However, previous related studies have focused on anticipating within the current network only and fail to predict convergence with possible innovation chances. Hence, a new method is suggested herein for anticipating new innovation chances, where a method for adding innovation signals to the current network is suggested, and link prediction is conducted to the revised network. This article comprises three sections: current network construction, innovation signal detection, and link prediction to the revised network. The suggested framework is applied to mobile financial services as an illustrative example. This article is expected to serve as a systematic method for investigating innovation chances and anticipating possible associations within innovation networks.",
url = "https://doi.org/10.1109/tem.2022.3213867",
doi = "10.1109/tem.2022.3213867",
openalex = "W4312997576",
references = "doi101007s1046201795902"
}
35. Stefanaki, Katerina and Ilias, Ioannis and Paschou, Stavroula A and Karagiannakis, Dimitrios S, 2023, Hepatokines: the missing link in the development of insulin resistance and hyperandrogenism in PCOS?: Hormones (Athens, Greece).
DOI: 10.1007/s42000-023-00487-x Source
Abstract
The liver plays a critical role in several metabolic pathways, including the regulation of glucose and lipid metabolism. Non-alcoholic fatty liver disease (NAFLD), the most common chronic liver disease worldwide, is closely associated with insulin resistance (IR) and metabolic syndrome (MetS). Hepatokines, newly discovered proteins secreted by hepatocytes, have been linked to the induction of these metabolic dysregulations. Polycystic ovary syndrome (PCOS), the most common endocrine disorder in women of reproductive age, has been associated with NAFLD and IR, while hyperandrogenism additionally appears to be implicated in the pathogenesis of the latter. However, the potential role of hepatokines in the development of metabolic disorders in PCOS has not been fully investigated. Therefore, the aim of this review is to critically appraise the current evidence regarding the interplay of hepatokines with NAFLD, hyperandrogenism, and IR in PCOS.
BibTeX
@article{doi101007s4200002300487x,
author = "Stefanaki, Katerina and Ilias, Ioannis and Paschou, Stavroula A and Karagiannakis, Dimitrios S",
title = "Hepatokines: the missing link in the development of insulin resistance and hyperandrogenism in PCOS?",
year = "2023",
journal = "Hormones (Athens, Greece)",
abstract = "The liver plays a critical role in several metabolic pathways, including the regulation of glucose and lipid metabolism. Non-alcoholic fatty liver disease (NAFLD), the most common chronic liver disease worldwide, is closely associated with insulin resistance (IR) and metabolic syndrome (MetS). Hepatokines, newly discovered proteins secreted by hepatocytes, have been linked to the induction of these metabolic dysregulations. Polycystic ovary syndrome (PCOS), the most common endocrine disorder in women of reproductive age, has been associated with NAFLD and IR, while hyperandrogenism additionally appears to be implicated in the pathogenesis of the latter. However, the potential role of hepatokines in the development of metabolic disorders in PCOS has not been fully investigated. Therefore, the aim of this review is to critically appraise the current evidence regarding the interplay of hepatokines with NAFLD, hyperandrogenism, and IR in PCOS.",
url = "https://pmc.ncbi.nlm.nih.gov/articles/6112576/",
doi = "10.1007/s42000-023-00487-x",
openalex = "W4386697076",
pmcid = "6112576",
pmid = "37704921",
references = "doi101016jfertnstert200310004, doi101016jfertnstert200806035, doi101056nejmra041536, doi101093humrepdeh098, doi101111cen13795, doi101159000375530, doi101172jci23606, doi101210edrv1860318, doi101210er20111034, doi101210jc20060178"
}