@ARTICLE{26543118_199015452_2016, author = {Ivan Smirnov and Elizaveta Sivak and Yana Kozmina}, keywords = {, social network analysis, social network sites, V K, data reliability, friendship networks, academic achievementschool}, title = {In Search of Lost Profiles: The Reliability of VKontakte Data and Its Importance for Educational Research}, journal = {Educational Studies Moscow}, year = {2016}, number = {4}, pages = {106-122}, url = {https://vo.hse.ru/en/2016--4/199015452.html}, publisher = {}, abstract = {Ivan Smirnov - ResearchAssistant, Institute of Education, National Research University Higher School of Economics. E-mail: ibsmirnov@hse.ruElizaveta Sivak - Research Fellow, Institute of Education, National Research University Higher School of Economics. E-mail: esivak@hse.ruYana Kozmina - Junior Research Fellow, Institute of Education, National Research University Higher School of Economics. E-mail: ikozmina@hse.ruAddress: 20 Myasnitskaya str., 101000 Moscow, Russian Federation.The potential of VKontakte as a data source is now acknowledged in educational research, but little is known about the reliability of data obtained from this social network and about its sampling bias. Our article investigates the reliability of VK data, using the examples of a secondary school (766 students) and a university (15,757 students). We describe the procedure of matching V K profiles to real students. A direct comparison permitted us to identify profiles of around 18% of students. A special technique introduced in the article increased this number up to 88% for school students and up to 93% for university students. We compare age, gender and GPA of identified students and those whomwe did not find on V K. We also compare the structure of social relationships, retrieved from VK data, to the expected structure of students’ social ties. We found that the structure of ‘virtual’ social relationships reproduces both the socio-demographic division of students into classes or majors andthe spatial division into different school buildings or university campuses. To our knowledge, it is the first study of this kind and scale based on VK data. It contributes to the understanding of how reliable data from this SNS is, how its accuracy can be improved, and how it can be used in educational research.}, annote = {Ivan Smirnov - ResearchAssistant, Institute of Education, National Research University Higher School of Economics. E-mail: ibsmirnov@hse.ruElizaveta Sivak - Research Fellow, Institute of Education, National Research University Higher School of Economics. E-mail: esivak@hse.ruYana Kozmina - Junior Research Fellow, Institute of Education, National Research University Higher School of Economics. E-mail: ikozmina@hse.ruAddress: 20 Myasnitskaya str., 101000 Moscow, Russian Federation.The potential of VKontakte as a data source is now acknowledged in educational research, but little is known about the reliability of data obtained from this social network and about its sampling bias. Our article investigates the reliability of VK data, using the examples of a secondary school (766 students) and a university (15,757 students). We describe the procedure of matching V K profiles to real students. A direct comparison permitted us to identify profiles of around 18% of students. A special technique introduced in the article increased this number up to 88% for school students and up to 93% for university students. We compare age, gender and GPA of identified students and those whomwe did not find on V K. We also compare the structure of social relationships, retrieved from VK data, to the expected structure of students’ social ties. We found that the structure of ‘virtual’ social relationships reproduces both the socio-demographic division of students into classes or majors andthe spatial division into different school buildings or university campuses. To our knowledge, it is the first study of this kind and scale based on VK data. It contributes to the understanding of how reliable data from this SNS is, how its accuracy can be improved, and how it can be used in educational research.} }