Hadoop基础-Apache Avro串行化的与反串行化
作者:尹正杰
版权声明:原创作品,谢绝转载!否则将追究法律责任。
一.Apache Avro简介
1>.Apache Avro的来源
Apache Avro 是一个中立性语言,它是有Hadoop之父Doug Cutting开发而来。因为hadoop的Writerable的串行化只支持Java语言,即非跨语言。所以Doug Cutting开发了Avro ,它是一个语言独立的数据结构,也就是说它是跨语言的。
2>.Avro特点
Apache Avro™是一个数据序列化系统。它有以下特点:
第一:丰富的数据结构。
第二:紧凑,快速的二进制数据格式。
第三:一个容器文件,用于存储持久性数据。
第四:远程过程调用(RPC)。
第五:与动态语言的简单集成。读取或写入数据文件不需要代码生成,也不需要使用或实现RPC协议。代码生成是一种可选的优化,只有静态类型语言才值得实现。
想要了解更多Avro,可以参考Apache官网(),我就懒得搬运了,直接上干货,本篇博客介绍了两种Avro序列化与反序列化的方式。
3>.安装Avro
下载Avro:
二.Avro环境准备
其实部署Avro的流程大致可以用下图来表示,配置起来相对来说还是蛮简单的,具体操作如下:
1>.创建emp.avsc文件,内容如下
1 { 2 "namespace": "tutorialspoint.com", 3 "type": "record", 4 "name": "Emp", 5 "fields": [ 6 {"name": "name", "type": "string"}, 7 {"name": "id", "type": "int"}, 8 {"name": "salary", "type": "int"}, 9 {"name": "age", "type": "int"},10 {"name": "address", "type": "string"}11 ]12 }
2>.将下载的avro-1.8.2.jar和avro-tools-1.8.2.jar文件放在emp.avsc同级目录
3>.编译schema文件
4>.查看文件内容
1 /** 2 * Autogenerated by Avro 3 * 4 * DO NOT EDIT DIRECTLY 5 */ 6 package tutorialspoint.com; 7 8 import org.apache.avro.specific.SpecificData; 9 import org.apache.avro.message.BinaryMessageEncoder; 10 import org.apache.avro.message.BinaryMessageDecoder; 11 import org.apache.avro.message.SchemaStore; 12 13 @SuppressWarnings("all") 14 @org.apache.avro.specific.AvroGenerated 15 public class Emp extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { 16 private static final long serialVersionUID = 6405205887550658768L; 17 public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Emp\",\"namespace\":\"tutorialspoint.com\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"id\",\"type\":\"int\"},{\"name\":\"salary\",\"type\":\"int\"},{\"name\":\"age\",\"type\":\"int\"},{\"name\":\"address\",\"type\":\"string\"}]}"); 18 public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; } 19 20 private static SpecificData MODEL$ = new SpecificData(); 21 22 private static final BinaryMessageEncoderENCODER = 23 new BinaryMessageEncoder (MODEL$, SCHEMA$); 24 25 private static final BinaryMessageDecoder DECODER = 26 new BinaryMessageDecoder (MODEL$, SCHEMA$); 27 28 /** 29 * Return the BinaryMessageDecoder instance used by this class. 30 */ 31 public static BinaryMessageDecoder getDecoder() { 32 return DECODER; 33 } 34 35 /** 36 * Create a new BinaryMessageDecoder instance for this class that uses the specified { @link SchemaStore}. 37 * @param resolver a { @link SchemaStore} used to find schemas by fingerprint 38 */ 39 public static BinaryMessageDecoder createDecoder(SchemaStore resolver) { 40 return new BinaryMessageDecoder (MODEL$, SCHEMA$, resolver); 41 } 42 43 /** Serializes this Emp to a ByteBuffer. */ 44 public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException { 45 return ENCODER.encode(this); 46 } 47 48 /** Deserializes a Emp from a ByteBuffer. */ 49 public static Emp fromByteBuffer( 50 java.nio.ByteBuffer b) throws java.io.IOException { 51 return DECODER.decode(b); 52 } 53 54 @Deprecated public java.lang.CharSequence name; 55 @Deprecated public int id; 56 @Deprecated public int salary; 57 @Deprecated public int age; 58 @Deprecated public java.lang.CharSequence address; 59 60 /** 61 * Default constructor. Note that this does not initialize fields 62 * to their default values from the schema. If that is desired then 63 * one should use newBuilder()
. 64 */ 65 public Emp() {} 66 67 /** 68 * All-args constructor. 69 * @param name The new value for name 70 * @param id The new value for id 71 * @param salary The new value for salary 72 * @param age The new value for age 73 * @param address The new value for address 74 */ 75 public Emp(java.lang.CharSequence name, java.lang.Integer id, java.lang.Integer salary, java.lang.Integer age, java.lang.CharSequence address) { 76 this.name = name; 77 this.id = id; 78 this.salary = salary; 79 this.age = age; 80 this.address = address; 81 } 82 83 public org.apache.avro.Schema getSchema() { return SCHEMA$; } 84 // Used by DatumWriter. Applications should not call. 85 public java.lang.Object get(int field$) { 86 switch (field$) { 87 case 0: return name; 88 case 1: return id; 89 case 2: return salary; 90 case 3: return age; 91 case 4: return address; 92 default: throw new org.apache.avro.AvroRuntimeException("Bad index"); 93 } 94 } 95 96 // Used by DatumReader. Applications should not call. 97 @SuppressWarnings(value="unchecked") 98 public void put(int field$, java.lang.Object value$) { 99 switch (field$) {100 case 0: name = (java.lang.CharSequence)value$; break;101 case 1: id = (java.lang.Integer)value$; break;102 case 2: salary = (java.lang.Integer)value$; break;103 case 3: age = (java.lang.Integer)value$; break;104 case 4: address = (java.lang.CharSequence)value$; break;105 default: throw new org.apache.avro.AvroRuntimeException("Bad index");106 }107 }108 109 /**110 * Gets the value of the 'name' field.111 * @return The value of the 'name' field.112 */113 public java.lang.CharSequence getName() {114 return name;115 }116 117 /**118 * Sets the value of the 'name' field.119 * @param value the value to set.120 */121 public void setName(java.lang.CharSequence value) {122 this.name = value;123 }124 125 /**126 * Gets the value of the 'id' field.127 * @return The value of the 'id' field.128 */129 public java.lang.Integer getId() {130 return id;131 }132 133 /**134 * Sets the value of the 'id' field.135 * @param value the value to set.136 */137 public void setId(java.lang.Integer value) {138 this.id = value;139 }140 141 /**142 * Gets the value of the 'salary' field.143 * @return The value of the 'salary' field.144 */145 public java.lang.Integer getSalary() {146 return salary;147 }148 149 /**150 * Sets the value of the 'salary' field.151 * @param value the value to set.152 */153 public void setSalary(java.lang.Integer value) {154 this.salary = value;155 }156 157 /**158 * Gets the value of the 'age' field.159 * @return The value of the 'age' field.160 */161 public java.lang.Integer getAge() {162 return age;163 }164 165 /**166 * Sets the value of the 'age' field.167 * @param value the value to set.168 */169 public void setAge(java.lang.Integer value) {170 this.age = value;171 }172 173 /**174 * Gets the value of the 'address' field.175 * @return The value of the 'address' field.176 */177 public java.lang.CharSequence getAddress() {178 return address;179 }180 181 /**182 * Sets the value of the 'address' field.183 * @param value the value to set.184 */185 public void setAddress(java.lang.CharSequence value) {186 this.address = value;187 }188 189 /**190 * Creates a new Emp RecordBuilder.191 * @return A new Emp RecordBuilder192 */193 public static tutorialspoint.com.Emp.Builder newBuilder() {194 return new tutorialspoint.com.Emp.Builder();195 }196 197 /**198 * Creates a new Emp RecordBuilder by copying an existing Builder.199 * @param other The existing builder to copy.200 * @return A new Emp RecordBuilder201 */202 public static tutorialspoint.com.Emp.Builder newBuilder(tutorialspoint.com.Emp.Builder other) {203 return new tutorialspoint.com.Emp.Builder(other);204 }205 206 /**207 * Creates a new Emp RecordBuilder by copying an existing Emp instance.208 * @param other The existing instance to copy.209 * @return A new Emp RecordBuilder210 */211 public static tutorialspoint.com.Emp.Builder newBuilder(tutorialspoint.com.Emp other) {212 return new tutorialspoint.com.Emp.Builder(other);213 }214 215 /**216 * RecordBuilder for Emp instances.217 */218 public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase219 implements org.apache.avro.data.RecordBuilder {220 221 private java.lang.CharSequence name;222 private int id;223 private int salary;224 private int age;225 private java.lang.CharSequence address;226 227 /** Creates a new Builder */228 private Builder() {229 super(SCHEMA$);230 }231 232 /**233 * Creates a Builder by copying an existing Builder.234 * @param other The existing Builder to copy.235 */236 private Builder(tutorialspoint.com.Emp.Builder other) {237 super(other);238 if (isValidValue(fields()[0], other.name)) {239 this.name = data().deepCopy(fields()[0].schema(), other.name);240 fieldSetFlags()[0] = true;241 }242 if (isValidValue(fields()[1], other.id)) {243 this.id = data().deepCopy(fields()[1].schema(), other.id);244 fieldSetFlags()[1] = true;245 }246 if (isValidValue(fields()[2], other.salary)) {247 this.salary = data().deepCopy(fields()[2].schema(), other.salary);248 fieldSetFlags()[2] = true;249 }250 if (isValidValue(fields()[3], other.age)) {251 this.age = data().deepCopy(fields()[3].schema(), other.age);252 fieldSetFlags()[3] = true;253 }254 if (isValidValue(fields()[4], other.address)) {255 this.address = data().deepCopy(fields()[4].schema(), other.address);256 fieldSetFlags()[4] = true;257 }258 }259 260 /**261 * Creates a Builder by copying an existing Emp instance262 * @param other The existing instance to copy.263 */264 private Builder(tutorialspoint.com.Emp other) {265 super(SCHEMA$);266 if (isValidValue(fields()[0], other.name)) {267 this.name = data().deepCopy(fields()[0].schema(), other.name);268 fieldSetFlags()[0] = true;269 }270 if (isValidValue(fields()[1], other.id)) {271 this.id = data().deepCopy(fields()[1].schema(), other.id);272 fieldSetFlags()[1] = true;273 }274 if (isValidValue(fields()[2], other.salary)) {275 this.salary = data().deepCopy(fields()[2].schema(), other.salary);276 fieldSetFlags()[2] = true;277 }278 if (isValidValue(fields()[3], other.age)) {279 this.age = data().deepCopy(fields()[3].schema(), other.age);280 fieldSetFlags()[3] = true;281 }282 if (isValidValue(fields()[4], other.address)) {283 this.address = data().deepCopy(fields()[4].schema(), other.address);284 fieldSetFlags()[4] = true;285 }286 }287 288 /**289 * Gets the value of the 'name' field.290 * @return The value.291 */292 public java.lang.CharSequence getName() {293 return name;294 }295 296 /**297 * Sets the value of the 'name' field.298 * @param value The value of 'name'.299 * @return This builder.300 */301 public tutorialspoint.com.Emp.Builder setName(java.lang.CharSequence value) {302 validate(fields()[0], value);303 this.name = value;304 fieldSetFlags()[0] = true;305 return this;306 }307 308 /**309 * Checks whether the 'name' field has been set.310 * @return True if the 'name' field has been set, false otherwise.311 */312 public boolean hasName() {313 return fieldSetFlags()[0];314 }315 316 317 /**318 * Clears the value of the 'name' field.319 * @return This builder.320 */321 public tutorialspoint.com.Emp.Builder clearName() {322 name = null;323 fieldSetFlags()[0] = false;324 return this;325 }326 327 /**328 * Gets the value of the 'id' field.329 * @return The value.330 */331 public java.lang.Integer getId() {332 return id;333 }334 335 /**336 * Sets the value of the 'id' field.337 * @param value The value of 'id'.338 * @return This builder.339 */340 public tutorialspoint.com.Emp.Builder setId(int value) {341 validate(fields()[1], value);342 this.id = value;343 fieldSetFlags()[1] = true;344 return this;345 }346 347 /**348 * Checks whether the 'id' field has been set.349 * @return True if the 'id' field has been set, false otherwise.350 */351 public boolean hasId() {352 return fieldSetFlags()[1];353 }354 355 356 /**357 * Clears the value of the 'id' field.358 * @return This builder.359 */360 public tutorialspoint.com.Emp.Builder clearId() {361 fieldSetFlags()[1] = false;362 return this;363 }364 365 /**366 * Gets the value of the 'salary' field.367 * @return The value.368 */369 public java.lang.Integer getSalary() {370 return salary;371 }372 373 /**374 * Sets the value of the 'salary' field.375 * @param value The value of 'salary'.376 * @return This builder.377 */378 public tutorialspoint.com.Emp.Builder setSalary(int value) {379 validate(fields()[2], value);380 this.salary = value;381 fieldSetFlags()[2] = true;382 return this;383 }384 385 /**386 * Checks whether the 'salary' field has been set.387 * @return True if the 'salary' field has been set, false otherwise.388 */389 public boolean hasSalary() {390 return fieldSetFlags()[2];391 }392 393 394 /**395 * Clears the value of the 'salary' field.396 * @return This builder.397 */398 public tutorialspoint.com.Emp.Builder clearSalary() {399 fieldSetFlags()[2] = false;400 return this;401 }402 403 /**404 * Gets the value of the 'age' field.405 * @return The value.406 */407 public java.lang.Integer getAge() {408 return age;409 }410 411 /**412 * Sets the value of the 'age' field.413 * @param value The value of 'age'.414 * @return This builder.415 */416 public tutorialspoint.com.Emp.Builder setAge(int value) {417 validate(fields()[3], value);418 this.age = value;419 fieldSetFlags()[3] = true;420 return this;421 }422 423 /**424 * Checks whether the 'age' field has been set.425 * @return True if the 'age' field has been set, false otherwise.426 */427 public boolean hasAge() {428 return fieldSetFlags()[3];429 }430 431 432 /**433 * Clears the value of the 'age' field.434 * @return This builder.435 */436 public tutorialspoint.com.Emp.Builder clearAge() {437 fieldSetFlags()[3] = false;438 return this;439 }440 441 /**442 * Gets the value of the 'address' field.443 * @return The value.444 */445 public java.lang.CharSequence getAddress() {446 return address;447 }448 449 /**450 * Sets the value of the 'address' field.451 * @param value The value of 'address'.452 * @return This builder.453 */454 public tutorialspoint.com.Emp.Builder setAddress(java.lang.CharSequence value) {455 validate(fields()[4], value);456 this.address = value;457 fieldSetFlags()[4] = true;458 return this;459 }460 461 /**462 * Checks whether the 'address' field has been set.463 * @return True if the 'address' field has been set, false otherwise.464 */465 public boolean hasAddress() {466 return fieldSetFlags()[4];467 }468 469 470 /**471 * Clears the value of the 'address' field.472 * @return This builder.473 */474 public tutorialspoint.com.Emp.Builder clearAddress() {475 address = null;476 fieldSetFlags()[4] = false;477 return this;478 }479 480 @Override481 @SuppressWarnings("unchecked")482 public Emp build() {483 try {484 Emp record = new Emp();485 record.name = fieldSetFlags()[0] ? this.name : (java.lang.CharSequence) defaultValue(fields()[0]);486 record.id = fieldSetFlags()[1] ? this.id : (java.lang.Integer) defaultValue(fields()[1]);487 record.salary = fieldSetFlags()[2] ? this.salary : (java.lang.Integer) defaultValue(fields()[2]);488 record.age = fieldSetFlags()[3] ? this.age : (java.lang.Integer) defaultValue(fields()[3]);489 record.address = fieldSetFlags()[4] ? this.address : (java.lang.CharSequence) defaultValue(fields()[4]);490 return record;491 } catch (java.lang.Exception e) {492 throw new org.apache.avro.AvroRuntimeException(e);493 }494 }495 }496 497 @SuppressWarnings("unchecked")498 private static final org.apache.avro.io.DatumWriter 499 WRITER$ = (org.apache.avro.io.DatumWriter )MODEL$.createDatumWriter(SCHEMA$);500 501 @Override public void writeExternal(java.io.ObjectOutput out)502 throws java.io.IOException {503 WRITER$.write(this, SpecificData.getEncoder(out));504 }505 506 @SuppressWarnings("unchecked")507 private static final org.apache.avro.io.DatumReader 508 READER$ = (org.apache.avro.io.DatumReader )MODEL$.createDatumReader(SCHEMA$);509 510 @Override public void readExternal(java.io.ObjectInput in)511 throws java.io.IOException {512 READER$.read(this, SpecificData.getDecoder(in));513 }514 515 }
5>.将此文件加载到idea
第一步:编辑pom.xml配置文件
1 25 4.0.0 6 7com.yinzhengjie 8myAvro-pb 91.0-SNAPSHOT 10 1112 2813 17org.apache.avro 14avro 151.8.2 1618 22org.apache.avro 19avro-tools 201.8.2 2123 27junit 24junit 254.12 26
第二步:新建包,名称tutorialspoint.com,并将Emp.java文件复制到包内,如有错误不建议直接修改。
1 /** 2 * Autogenerated by Avro 3 * 4 * DO NOT EDIT DIRECTLY 5 */ 6 package tutorialspoint.com; 7 8 import org.apache.avro.specific.SpecificData; 9 import org.apache.avro.message.BinaryMessageEncoder; 10 import org.apache.avro.message.BinaryMessageDecoder; 11 import org.apache.avro.message.SchemaStore; 12 13 @SuppressWarnings("all") 14 @org.apache.avro.specific.AvroGenerated 15 public class Emp extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { 16 private static final long serialVersionUID = 6405205887550658768L; 17 public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Emp\",\"namespace\":\"tutorialspoint.com\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"id\",\"type\":\"int\"},{\"name\":\"salary\",\"type\":\"int\"},{\"name\":\"age\",\"type\":\"int\"},{\"name\":\"address\",\"type\":\"string\"}]}"); 18 public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; } 19 20 private static SpecificData MODEL$ = new SpecificData(); 21 22 private static final BinaryMessageEncoderENCODER = 23 new BinaryMessageEncoder (MODEL$, SCHEMA$); 24 25 private static final BinaryMessageDecoder DECODER = 26 new BinaryMessageDecoder (MODEL$, SCHEMA$); 27 28 /** 29 * Return the BinaryMessageDecoder instance used by this class. 30 */ 31 public static BinaryMessageDecoder getDecoder() { 32 return DECODER; 33 } 34 35 /** 36 * Create a new BinaryMessageDecoder instance for this class that uses the specified { @link SchemaStore}. 37 * @param resolver a { @link SchemaStore} used to find schemas by fingerprint 38 */ 39 public static BinaryMessageDecoder createDecoder(SchemaStore resolver) { 40 return new BinaryMessageDecoder (MODEL$, SCHEMA$, resolver); 41 } 42 43 /** Serializes this Emp to a ByteBuffer. */ 44 public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException { 45 return ENCODER.encode(this); 46 } 47 48 /** Deserializes a Emp from a ByteBuffer. */ 49 public static Emp fromByteBuffer( 50 java.nio.ByteBuffer b) throws java.io.IOException { 51 return DECODER.decode(b); 52 } 53 54 @Deprecated public CharSequence name; 55 @Deprecated public int id; 56 @Deprecated public int salary; 57 @Deprecated public int age; 58 @Deprecated public CharSequence address; 59 60 /** 61 * Default constructor. Note that this does not initialize fields 62 * to their default values from the schema. If that is desired then 63 * one should use newBuilder()
. 64 */ 65 public Emp() {} 66 67 /** 68 * All-args constructor. 69 * @param name The new value for name 70 * @param id The new value for id 71 * @param salary The new value for salary 72 * @param age The new value for age 73 * @param address The new value for address 74 */ 75 public Emp(CharSequence name, Integer id, Integer salary, Integer age, CharSequence address) { 76 this.name = name; 77 this.id = id; 78 this.salary = salary; 79 this.age = age; 80 this.address = address; 81 } 82 83 public org.apache.avro.Schema getSchema() { return SCHEMA$; } 84 // Used by DatumWriter. Applications should not call. 85 public Object get(int field$) { 86 switch (field$) { 87 case 0: return name; 88 case 1: return id; 89 case 2: return salary; 90 case 3: return age; 91 case 4: return address; 92 default: throw new org.apache.avro.AvroRuntimeException("Bad index"); 93 } 94 } 95 96 // Used by DatumReader. Applications should not call. 97 @SuppressWarnings(value="unchecked") 98 public void put(int field$, Object value$) { 99 switch (field$) {100 case 0: name = (CharSequence)value$; break;101 case 1: id = (Integer)value$; break;102 case 2: salary = (Integer)value$; break;103 case 3: age = (Integer)value$; break;104 case 4: address = (CharSequence)value$; break;105 default: throw new org.apache.avro.AvroRuntimeException("Bad index");106 }107 }108 109 /**110 * Gets the value of the 'name' field.111 * @return The value of the 'name' field.112 */113 public CharSequence getName() {114 return name;115 }116 117 /**118 * Sets the value of the 'name' field.119 * @param value the value to set.120 */121 public void setName(CharSequence value) {122 this.name = value;123 }124 125 /**126 * Gets the value of the 'id' field.127 * @return The value of the 'id' field.128 */129 public Integer getId() {130 return id;131 }132 133 /**134 * Sets the value of the 'id' field.135 * @param value the value to set.136 */137 public void setId(Integer value) {138 this.id = value;139 }140 141 /**142 * Gets the value of the 'salary' field.143 * @return The value of the 'salary' field.144 */145 public Integer getSalary() {146 return salary;147 }148 149 /**150 * Sets the value of the 'salary' field.151 * @param value the value to set.152 */153 public void setSalary(Integer value) {154 this.salary = value;155 }156 157 /**158 * Gets the value of the 'age' field.159 * @return The value of the 'age' field.160 */161 public Integer getAge() {162 return age;163 }164 165 /**166 * Sets the value of the 'age' field.167 * @param value the value to set.168 */169 public void setAge(Integer value) {170 this.age = value;171 }172 173 /**174 * Gets the value of the 'address' field.175 * @return The value of the 'address' field.176 */177 public CharSequence getAddress() {178 return address;179 }180 181 /**182 * Sets the value of the 'address' field.183 * @param value the value to set.184 */185 public void setAddress(CharSequence value) {186 this.address = value;187 }188 189 /**190 * Creates a new Emp RecordBuilder.191 * @return A new Emp RecordBuilder192 */193 public static Builder newBuilder() {194 return new Builder();195 }196 197 /**198 * Creates a new Emp RecordBuilder by copying an existing Builder.199 * @param other The existing builder to copy.200 * @return A new Emp RecordBuilder201 */202 public static Builder newBuilder(Builder other) {203 return new Builder(other);204 }205 206 /**207 * Creates a new Emp RecordBuilder by copying an existing Emp instance.208 * @param other The existing instance to copy.209 * @return A new Emp RecordBuilder210 */211 public static Builder newBuilder(Emp other) {212 return new Builder(other);213 }214 215 /**216 * RecordBuilder for Emp instances.217 */218 public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase219 implements org.apache.avro.data.RecordBuilder {220 221 private CharSequence name;222 private int id;223 private int salary;224 private int age;225 private CharSequence address;226 227 /** Creates a new Builder */228 private Builder() {229 super(SCHEMA$);230 }231 232 /**233 * Creates a Builder by copying an existing Builder.234 * @param other The existing Builder to copy.235 */236 private Builder(Builder other) {237 super(other);238 if (isValidValue(fields()[0], other.name)) {239 this.name = data().deepCopy(fields()[0].schema(), other.name);240 fieldSetFlags()[0] = true;241 }242 if (isValidValue(fields()[1], other.id)) {243 this.id = data().deepCopy(fields()[1].schema(), other.id);244 fieldSetFlags()[1] = true;245 }246 if (isValidValue(fields()[2], other.salary)) {247 this.salary = data().deepCopy(fields()[2].schema(), other.salary);248 fieldSetFlags()[2] = true;249 }250 if (isValidValue(fields()[3], other.age)) {251 this.age = data().deepCopy(fields()[3].schema(), other.age);252 fieldSetFlags()[3] = true;253 }254 if (isValidValue(fields()[4], other.address)) {255 this.address = data().deepCopy(fields()[4].schema(), other.address);256 fieldSetFlags()[4] = true;257 }258 }259 260 /**261 * Creates a Builder by copying an existing Emp instance262 * @param other The existing instance to copy.263 */264 private Builder(Emp other) {265 super(SCHEMA$);266 if (isValidValue(fields()[0], other.name)) {267 this.name = data().deepCopy(fields()[0].schema(), other.name);268 fieldSetFlags()[0] = true;269 }270 if (isValidValue(fields()[1], other.id)) {271 this.id = data().deepCopy(fields()[1].schema(), other.id);272 fieldSetFlags()[1] = true;273 }274 if (isValidValue(fields()[2], other.salary)) {275 this.salary = data().deepCopy(fields()[2].schema(), other.salary);276 fieldSetFlags()[2] = true;277 }278 if (isValidValue(fields()[3], other.age)) {279 this.age = data().deepCopy(fields()[3].schema(), other.age);280 fieldSetFlags()[3] = true;281 }282 if (isValidValue(fields()[4], other.address)) {283 this.address = data().deepCopy(fields()[4].schema(), other.address);284 fieldSetFlags()[4] = true;285 }286 }287 288 /**289 * Gets the value of the 'name' field.290 * @return The value.291 */292 public CharSequence getName() {293 return name;294 }295 296 /**297 * Sets the value of the 'name' field.298 * @param value The value of 'name'.299 * @return This builder.300 */301 public Builder setName(CharSequence value) {302 validate(fields()[0], value);303 this.name = value;304 fieldSetFlags()[0] = true;305 return this;306 }307 308 /**309 * Checks whether the 'name' field has been set.310 * @return True if the 'name' field has been set, false otherwise.311 */312 public boolean hasName() {313 return fieldSetFlags()[0];314 }315 316 317 /**318 * Clears the value of the 'name' field.319 * @return This builder.320 */321 public Builder clearName() {322 name = null;323 fieldSetFlags()[0] = false;324 return this;325 }326 327 /**328 * Gets the value of the 'id' field.329 * @return The value.330 */331 public Integer getId() {332 return id;333 }334 335 /**336 * Sets the value of the 'id' field.337 * @param value The value of 'id'.338 * @return This builder.339 */340 public Builder setId(int value) {341 validate(fields()[1], value);342 this.id = value;343 fieldSetFlags()[1] = true;344 return this;345 }346 347 /**348 * Checks whether the 'id' field has been set.349 * @return True if the 'id' field has been set, false otherwise.350 */351 public boolean hasId() {352 return fieldSetFlags()[1];353 }354 355 356 /**357 * Clears the value of the 'id' field.358 * @return This builder.359 */360 public Builder clearId() {361 fieldSetFlags()[1] = false;362 return this;363 }364 365 /**366 * Gets the value of the 'salary' field.367 * @return The value.368 */369 public Integer getSalary() {370 return salary;371 }372 373 /**374 * Sets the value of the 'salary' field.375 * @param value The value of 'salary'.376 * @return This builder.377 */378 public Builder setSalary(int value) {379 validate(fields()[2], value);380 this.salary = value;381 fieldSetFlags()[2] = true;382 return this;383 }384 385 /**386 * Checks whether the 'salary' field has been set.387 * @return True if the 'salary' field has been set, false otherwise.388 */389 public boolean hasSalary() {390 return fieldSetFlags()[2];391 }392 393 394 /**395 * Clears the value of the 'salary' field.396 * @return This builder.397 */398 public Builder clearSalary() {399 fieldSetFlags()[2] = false;400 return this;401 }402 403 /**404 * Gets the value of the 'age' field.405 * @return The value.406 */407 public Integer getAge() {408 return age;409 }410 411 /**412 * Sets the value of the 'age' field.413 * @param value The value of 'age'.414 * @return This builder.415 */416 public Builder setAge(int value) {417 validate(fields()[3], value);418 this.age = value;419 fieldSetFlags()[3] = true;420 return this;421 }422 423 /**424 * Checks whether the 'age' field has been set.425 * @return True if the 'age' field has been set, false otherwise.426 */427 public boolean hasAge() {428 return fieldSetFlags()[3];429 }430 431 432 /**433 * Clears the value of the 'age' field.434 * @return This builder.435 */436 public Builder clearAge() {437 fieldSetFlags()[3] = false;438 return this;439 }440 441 /**442 * Gets the value of the 'address' field.443 * @return The value.444 */445 public CharSequence getAddress() {446 return address;447 }448 449 /**450 * Sets the value of the 'address' field.451 * @param value The value of 'address'.452 * @return This builder.453 */454 public Builder setAddress(CharSequence value) {455 validate(fields()[4], value);456 this.address = value;457 fieldSetFlags()[4] = true;458 return this;459 }460 461 /**462 * Checks whether the 'address' field has been set.463 * @return True if the 'address' field has been set, false otherwise.464 */465 public boolean hasAddress() {466 return fieldSetFlags()[4];467 }468 469 470 /**471 * Clears the value of the 'address' field.472 * @return This builder.473 */474 public Builder clearAddress() {475 address = null;476 fieldSetFlags()[4] = false;477 return this;478 }479 480 481 @SuppressWarnings("unchecked")482 public Emp build() {483 try {484 Emp record = new Emp();485 record.name = fieldSetFlags()[0] ? this.name : (CharSequence) defaultValue(fields()[0]);486 record.id = fieldSetFlags()[1] ? this.id : (Integer) defaultValue(fields()[1]);487 record.salary = fieldSetFlags()[2] ? this.salary : (Integer) defaultValue(fields()[2]);488 record.age = fieldSetFlags()[3] ? this.age : (Integer) defaultValue(fields()[3]);489 record.address = fieldSetFlags()[4] ? this.address : (CharSequence) defaultValue(fields()[4]);490 return record;491 } catch (Exception e) {492 throw new org.apache.avro.AvroRuntimeException(e);493 }494 }495 }496 497 @SuppressWarnings("unchecked")498 private static final org.apache.avro.io.DatumWriter 499 WRITER$ = (org.apache.avro.io.DatumWriter )MODEL$.createDatumWriter(SCHEMA$);500 501 @Override public void writeExternal(java.io.ObjectOutput out)502 throws java.io.IOException {503 WRITER$.write(this, SpecificData.getEncoder(out));504 }505 506 @SuppressWarnings("unchecked")507 private static final org.apache.avro.io.DatumReader 508 READER$ = (org.apache.avro.io.DatumReader )MODEL$.createDatumReader(SCHEMA$);509 510 @Override public void readExternal(java.io.ObjectInput in)511 throws java.io.IOException {512 READER$.read(this, SpecificData.getDecoder(in));513 }514 515 }
三.进行Avro编程
1>.开始编写串行化代码
/*@author :yinzhengjieBlog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/EMAIL:y1053419035@qq.com*/package cn.org.yinzhengjie.avro;import org.apache.avro.file.DataFileWriter;import org.apache.avro.io.DatumWriter;import org.apache.avro.specific.SpecificDatumWriter;import org.junit.Test;import tutorialspoint.com.Emp;import java.io.File;import java.io.IOException;public class TestAvro { /** * 测试Avro序列化 */ @Test public void testAvroSerial() throws IOException { //定义对象,并给对象赋初值 Emp yinzhengjie = new Emp(); yinzhengjie.setId(1); yinzhengjie.setName("尹正杰"); yinzhengjie.setAge(18); yinzhengjie.setSalary(80000); yinzhengjie.setAddress("北京"); //初始化writer对象,我习惯称它为写入器 DatumWriterdw = new SpecificDatumWriter (Emp.class); //初始化文件写入器 DataFileWriter dfw = new DataFileWriter (dw); //开始序列化文件,这个 Emp.SCHEMA$ 其实是一个常量,是咱们编译时自动生成的一个json格式的字符串。 dfw.create(Emp.SCHEMA$,new File("D:\\10.Java\\IDE\\yhinzhengjieData\\Avro\\emp.avro")); //在序列化文件中追加对象 dfw.append(yinzhengjie); //释放资源 dfw.close(); System.out.println("序列化成功!"); }}
2>.测试java、hadoop、avro对10000000个对象串行化速度比对,大小比对
1 /** 2 * Autogenerated by Avro 3 * 4 * DO NOT EDIT DIRECTLY 5 */ 6 package tutorialspoint.com; 7 8 import org.apache.avro.specific.SpecificData; 9 import org.apache.avro.message.BinaryMessageEncoder; 10 import org.apache.avro.message.BinaryMessageDecoder; 11 import org.apache.avro.message.SchemaStore; 12 13 @SuppressWarnings("all") 14 @org.apache.avro.specific.AvroGenerated 15 public class Emp extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { 16 private static final long serialVersionUID = 6405205887550658768L; 17 public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Emp\",\"namespace\":\"tutorialspoint.com\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"id\",\"type\":\"int\"},{\"name\":\"salary\",\"type\":\"int\"},{\"name\":\"age\",\"type\":\"int\"},{\"name\":\"address\",\"type\":\"string\"}]}"); 18 public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; } 19 20 private static SpecificData MODEL$ = new SpecificData(); 21 22 private static final BinaryMessageEncoderENCODER = 23 new BinaryMessageEncoder (MODEL$, SCHEMA$); 24 25 private static final BinaryMessageDecoder DECODER = 26 new BinaryMessageDecoder (MODEL$, SCHEMA$); 27 28 /** 29 * Return the BinaryMessageDecoder instance used by this class. 30 */ 31 public static BinaryMessageDecoder getDecoder() { 32 return DECODER; 33 } 34 35 /** 36 * Create a new BinaryMessageDecoder instance for this class that uses the specified { @link SchemaStore}. 37 * @param resolver a { @link SchemaStore} used to find schemas by fingerprint 38 */ 39 public static BinaryMessageDecoder createDecoder(SchemaStore resolver) { 40 return new BinaryMessageDecoder (MODEL$, SCHEMA$, resolver); 41 } 42 43 /** Serializes this Emp to a ByteBuffer. */ 44 public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException { 45 return ENCODER.encode(this); 46 } 47 48 /** Deserializes a Emp from a ByteBuffer. */ 49 public static Emp fromByteBuffer( 50 java.nio.ByteBuffer b) throws java.io.IOException { 51 return DECODER.decode(b); 52 } 53 54 @Deprecated public CharSequence name; 55 @Deprecated public int id; 56 @Deprecated public int salary; 57 @Deprecated public int age; 58 @Deprecated public CharSequence address; 59 60 /** 61 * Default constructor. Note that this does not initialize fields 62 * to their default values from the schema. If that is desired then 63 * one should use newBuilder()
. 64 */ 65 public Emp() {} 66 67 /** 68 * All-args constructor. 69 * @param name The new value for name 70 * @param id The new value for id 71 * @param salary The new value for salary 72 * @param age The new value for age 73 * @param address The new value for address 74 */ 75 public Emp(CharSequence name, Integer id, Integer salary, Integer age, CharSequence address) { 76 this.name = name; 77 this.id = id; 78 this.salary = salary; 79 this.age = age; 80 this.address = address; 81 } 82 83 public org.apache.avro.Schema getSchema() { return SCHEMA$; } 84 // Used by DatumWriter. Applications should not call. 85 public Object get(int field$) { 86 switch (field$) { 87 case 0: return name; 88 case 1: return id; 89 case 2: return salary; 90 case 3: return age; 91 case 4: return address; 92 default: throw new org.apache.avro.AvroRuntimeException("Bad index"); 93 } 94 } 95 96 // Used by DatumReader. Applications should not call. 97 @SuppressWarnings(value="unchecked") 98 public void put(int field$, Object value$) { 99 switch (field$) {100 case 0: name = (CharSequence)value$; break;101 case 1: id = (Integer)value$; break;102 case 2: salary = (Integer)value$; break;103 case 3: age = (Integer)value$; break;104 case 4: address = (CharSequence)value$; break;105 default: throw new org.apache.avro.AvroRuntimeException("Bad index");106 }107 }108 109 /**110 * Gets the value of the 'name' field.111 * @return The value of the 'name' field.112 */113 public CharSequence getName() {114 return name;115 }116 117 /**118 * Sets the value of the 'name' field.119 * @param value the value to set.120 */121 public void setName(CharSequence value) {122 this.name = value;123 }124 125 /**126 * Gets the value of the 'id' field.127 * @return The value of the 'id' field.128 */129 public Integer getId() {130 return id;131 }132 133 /**134 * Sets the value of the 'id' field.135 * @param value the value to set.136 */137 public void setId(Integer value) {138 this.id = value;139 }140 141 /**142 * Gets the value of the 'salary' field.143 * @return The value of the 'salary' field.144 */145 public Integer getSalary() {146 return salary;147 }148 149 /**150 * Sets the value of the 'salary' field.151 * @param value the value to set.152 */153 public void setSalary(Integer value) {154 this.salary = value;155 }156 157 /**158 * Gets the value of the 'age' field.159 * @return The value of the 'age' field.160 */161 public Integer getAge() {162 return age;163 }164 165 /**166 * Sets the value of the 'age' field.167 * @param value the value to set.168 */169 public void setAge(Integer value) {170 this.age = value;171 }172 173 /**174 * Gets the value of the 'address' field.175 * @return The value of the 'address' field.176 */177 public CharSequence getAddress() {178 return address;179 }180 181 /**182 * Sets the value of the 'address' field.183 * @param value the value to set.184 */185 public void setAddress(CharSequence value) {186 this.address = value;187 }188 189 /**190 * Creates a new Emp RecordBuilder.191 * @return A new Emp RecordBuilder192 */193 public static Builder newBuilder() {194 return new Builder();195 }196 197 /**198 * Creates a new Emp RecordBuilder by copying an existing Builder.199 * @param other The existing builder to copy.200 * @return A new Emp RecordBuilder201 */202 public static Builder newBuilder(Builder other) {203 return new Builder(other);204 }205 206 /**207 * Creates a new Emp RecordBuilder by copying an existing Emp instance.208 * @param other The existing instance to copy.209 * @return A new Emp RecordBuilder210 */211 public static Builder newBuilder(Emp other) {212 return new Builder(other);213 }214 215 /**216 * RecordBuilder for Emp instances.217 */218 public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase219 implements org.apache.avro.data.RecordBuilder {220 221 private CharSequence name;222 private int id;223 private int salary;224 private int age;225 private CharSequence address;226 227 /** Creates a new Builder */228 private Builder() {229 super(SCHEMA$);230 }231 232 /**233 * Creates a Builder by copying an existing Builder.234 * @param other The existing Builder to copy.235 */236 private Builder(Builder other) {237 super(other);238 if (isValidValue(fields()[0], other.name)) {239 this.name = data().deepCopy(fields()[0].schema(), other.name);240 fieldSetFlags()[0] = true;241 }242 if (isValidValue(fields()[1], other.id)) {243 this.id = data().deepCopy(fields()[1].schema(), other.id);244 fieldSetFlags()[1] = true;245 }246 if (isValidValue(fields()[2], other.salary)) {247 this.salary = data().deepCopy(fields()[2].schema(), other.salary);248 fieldSetFlags()[2] = true;249 }250 if (isValidValue(fields()[3], other.age)) {251 this.age = data().deepCopy(fields()[3].schema(), other.age);252 fieldSetFlags()[3] = true;253 }254 if (isValidValue(fields()[4], other.address)) {255 this.address = data().deepCopy(fields()[4].schema(), other.address);256 fieldSetFlags()[4] = true;257 }258 }259 260 /**261 * Creates a Builder by copying an existing Emp instance262 * @param other The existing instance to copy.263 */264 private Builder(Emp other) {265 super(SCHEMA$);266 if (isValidValue(fields()[0], other.name)) {267 this.name = data().deepCopy(fields()[0].schema(), other.name);268 fieldSetFlags()[0] = true;269 }270 if (isValidValue(fields()[1], other.id)) {271 this.id = data().deepCopy(fields()[1].schema(), other.id);272 fieldSetFlags()[1] = true;273 }274 if (isValidValue(fields()[2], other.salary)) {275 this.salary = data().deepCopy(fields()[2].schema(), other.salary);276 fieldSetFlags()[2] = true;277 }278 if (isValidValue(fields()[3], other.age)) {279 this.age = data().deepCopy(fields()[3].schema(), other.age);280 fieldSetFlags()[3] = true;281 }282 if (isValidValue(fields()[4], other.address)) {283 this.address = data().deepCopy(fields()[4].schema(), other.address);284 fieldSetFlags()[4] = true;285 }286 }287 288 /**289 * Gets the value of the 'name' field.290 * @return The value.291 */292 public CharSequence getName() {293 return name;294 }295 296 /**297 * Sets the value of the 'name' field.298 * @param value The value of 'name'.299 * @return This builder.300 */301 public Builder setName(CharSequence value) {302 validate(fields()[0], value);303 this.name = value;304 fieldSetFlags()[0] = true;305 return this;306 }307 308 /**309 * Checks whether the 'name' field has been set.310 * @return True if the 'name' field has been set, false otherwise.311 */312 public boolean hasName() {313 return fieldSetFlags()[0];314 }315 316 317 /**318 * Clears the value of the 'name' field.319 * @return This builder.320 */321 public Builder clearName() {322 name = null;323 fieldSetFlags()[0] = false;324 return this;325 }326 327 /**328 * Gets the value of the 'id' field.329 * @return The value.330 */331 public Integer getId() {332 return id;333 }334 335 /**336 * Sets the value of the 'id' field.337 * @param value The value of 'id'.338 * @return This builder.339 */340 public Builder setId(int value) {341 validate(fields()[1], value);342 this.id = value;343 fieldSetFlags()[1] = true;344 return this;345 }346 347 /**348 * Checks whether the 'id' field has been set.349 * @return True if the 'id' field has been set, false otherwise.350 */351 public boolean hasId() {352 return fieldSetFlags()[1];353 }354 355 356 /**357 * Clears the value of the 'id' field.358 * @return This builder.359 */360 public Builder clearId() {361 fieldSetFlags()[1] = false;362 return this;363 }364 365 /**366 * Gets the value of the 'salary' field.367 * @return The value.368 */369 public Integer getSalary() {370 return salary;371 }372 373 /**374 * Sets the value of the 'salary' field.375 * @param value The value of 'salary'.376 * @return This builder.377 */378 public Builder setSalary(int value) {379 validate(fields()[2], value);380 this.salary = value;381 fieldSetFlags()[2] = true;382 return this;383 }384 385 /**386 * Checks whether the 'salary' field has been set.387 * @return True if the 'salary' field has been set, false otherwise.388 */389 public boolean hasSalary() {390 return fieldSetFlags()[2];391 }392 393 394 /**395 * Clears the value of the 'salary' field.396 * @return This builder.397 */398 public Builder clearSalary() {399 fieldSetFlags()[2] = false;400 return this;401 }402 403 /**404 * Gets the value of the 'age' field.405 * @return The value.406 */407 public Integer getAge() {408 return age;409 }410 411 /**412 * Sets the value of the 'age' field.413 * @param value The value of 'age'.414 * @return This builder.415 */416 public Builder setAge(int value) {417 validate(fields()[3], value);418 this.age = value;419 fieldSetFlags()[3] = true;420 return this;421 }422 423 /**424 * Checks whether the 'age' field has been set.425 * @return True if the 'age' field has been set, false otherwise.426 */427 public boolean hasAge() {428 return fieldSetFlags()[3];429 }430 431 432 /**433 * Clears the value of the 'age' field.434 * @return This builder.435 */436 public Builder clearAge() {437 fieldSetFlags()[3] = false;438 return this;439 }440 441 /**442 * Gets the value of the 'address' field.443 * @return The value.444 */445 public CharSequence getAddress() {446 return address;447 }448 449 /**450 * Sets the value of the 'address' field.451 * @param value The value of 'address'.452 * @return This builder.453 */454 public Builder setAddress(CharSequence value) {455 validate(fields()[4], value);456 this.address = value;457 fieldSetFlags()[4] = true;458 return this;459 }460 461 /**462 * Checks whether the 'address' field has been set.463 * @return True if the 'address' field has been set, false otherwise.464 */465 public boolean hasAddress() {466 return fieldSetFlags()[4];467 }468 469 470 /**471 * Clears the value of the 'address' field.472 * @return This builder.473 */474 public Builder clearAddress() {475 address = null;476 fieldSetFlags()[4] = false;477 return this;478 }479 480 481 @SuppressWarnings("unchecked")482 public Emp build() {483 try {484 Emp record = new Emp();485 record.name = fieldSetFlags()[0] ? this.name : (CharSequence) defaultValue(fields()[0]);486 record.id = fieldSetFlags()[1] ? this.id : (Integer) defaultValue(fields()[1]);487 record.salary = fieldSetFlags()[2] ? this.salary : (Integer) defaultValue(fields()[2]);488 record.age = fieldSetFlags()[3] ? this.age : (Integer) defaultValue(fields()[3]);489 record.address = fieldSetFlags()[4] ? this.address : (CharSequence) defaultValue(fields()[4]);490 return record;491 } catch (Exception e) {492 throw new org.apache.avro.AvroRuntimeException(e);493 }494 }495 }496 497 @SuppressWarnings("unchecked")498 private static final org.apache.avro.io.DatumWriter 499 WRITER$ = (org.apache.avro.io.DatumWriter )MODEL$.createDatumWriter(SCHEMA$);500 501 @Override public void writeExternal(java.io.ObjectOutput out)502 throws java.io.IOException {503 WRITER$.write(this, SpecificData.getEncoder(out));504 }505 506 @SuppressWarnings("unchecked")507 private static final org.apache.avro.io.DatumReader 508 READER$ = (org.apache.avro.io.DatumReader )MODEL$.createDatumReader(SCHEMA$);509 510 @Override public void readExternal(java.io.ObjectInput in)511 throws java.io.IOException {512 READER$.read(this, SpecificData.getDecoder(in));513 }514 515 }
1 /* 2 @author :yinzhengjie 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/ 4 EMAIL:y1053419035@qq.com 5 */ 6 package cn.org.yinzhengjie.avro; 7 8 import java.io.Serializable; 9 10 public class Emp2 implements Serializable {11 private int id;12 private String name;13 private int age;14 private String address;15 private int salary;16 17 public int getId() {18 return id;19 }20 public void setId(int id) {21 this.id = id;22 }23 public String getName() {24 return name;25 }26 public void setName(String name) {27 this.name = name;28 }29 public int getAge() {30 return age;31 }32 public void setAge(int age) {33 this.age = age;34 }35 public String getAddress() {36 return address;37 }38 public void setAddress(String address) {39 this.address = address;40 }41 public int getSalary() {42 return salary;43 }44 public void setSalary(int salary) {45 this.salary = salary;46 }47 }
1 /* 2 @author :yinzhengjie 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/ 4 EMAIL:y1053419035@qq.com 5 */ 6 package cn.org.yinzhengjie.avro; 7 8 import org.apache.hadoop.io.Writable; 9 import tutorialspoint.com.Emp;10 11 import java.io.DataInput;12 import java.io.DataOutput;13 import java.io.IOException;14 15 public class EmpWritable implements Writable {16 private Emp emp;17 18 public Emp getEmp() {19 return emp;20 }21 22 public void setEmp(Emp emp) {23 this.emp = emp;24 25 }26 27 28 public EmpWritable() {29 this.emp = emp;30 }31 32 //串行化33 public void write(DataOutput dataOutput) throws IOException {34 dataOutput.writeUTF((String)emp.getName());35 dataOutput.writeInt(emp.getId());36 dataOutput.writeInt(emp.getSalary());37 dataOutput.writeInt(emp.getAge());38 dataOutput.writeUTF((String)emp.getAddress());39 }40 41 //发串行化,我们测试的是串行化,咱们不需要写反串行化的代码,空实现即可。42 public void readFields(DataInput dataInput) {43 }44 }
/*@author :yinzhengjieBlog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/EMAIL:y1053419035@qq.com*/package cn.org.yinzhengjie.avro;import org.apache.avro.file.DataFileWriter;import org.apache.avro.io.DatumWriter;import org.apache.avro.specific.SpecificDatumWriter;import tutorialspoint.com.Emp;import java.io.*;public class TestAvro { private static final File avro = new File("D:\\10.Java\\IDE\\yhinzhengjieData\\Avro\\emp.avroTest"); private static final File java = new File("D:\\10.Java\\IDE\\yhinzhengjieData\\Avro\\emp.javaTest"); private static final File hadoop = new File("D:\\10.Java\\IDE\\yhinzhengjieData\\Avro\\emp.hadoopTest"); public static void main(String[] args) throws IOException { testAvroSerial(); testJavaSerial(); testHadoopSerial(); } //测试hadoop的序列化方式 public static void testHadoopSerial() throws IOException { long start = System.currentTimeMillis(); //定义对象,并给对象赋初值 Emp p = new Emp(); p.setId(3); p.setName("jay"); p.setAge(30); p.setSalary(15000); p.setAddress("昌平"); EmpWritable ew = new EmpWritable(); ew.setEmp(p); FileOutputStream fos = new FileOutputStream(hadoop); DataOutputStream dos = new DataOutputStream(fos); for (int i = 0;i <= 10000000;i++){ ew.write(dos); } fos.close(); dos.close(); System.out.printf("这是Hadoop序列化方式: 生成文件大小:[%d],用时:[%d]\n",hadoop.length(),System.currentTimeMillis() - start); } //测试Avro序列化 public static void testAvroSerial() throws IOException { long start = System.currentTimeMillis(); //定义对象,并给对象赋初值 Emp yinzhengjie = new Emp(); yinzhengjie.setId(1); yinzhengjie.setName("尹正杰"); yinzhengjie.setAge(18); yinzhengjie.setSalary(80000); yinzhengjie.setAddress("北京"); //初始化writer对象,我习惯称它为写入器 DatumWriterdw = new SpecificDatumWriter (Emp.class); //初始化文件写入器 DataFileWriter dfw = new DataFileWriter (dw); //开始序列化文件,这个 Emp2.SCHEMA$ 其实是一个常量,是咱们编译时自动生成的一个json格式的字符串。 dfw.create(Emp.SCHEMA$,avro); //在序列化文件中追加对象 for (int i = 0;i <= 10000000;i++){ dfw.append(yinzhengjie); } //释放资源 dfw.close(); System.out.printf("这是Avro序列化方式: 生成文件大小:[%d],用时:[%d]\n",avro.length(),System.currentTimeMillis() - start); } //测试Java序列化 public static void testJavaSerial() throws IOException { long start = System.currentTimeMillis(); Emp2 p = new Emp2(); p.setId(2); p.setName("tom"); p.setAge(20); p.setSalary(13000); p.setAddress("亦庄"); FileOutputStream fos = new FileOutputStream(java); ObjectOutputStream oos = new ObjectOutputStream(fos); for (int i = 0;i <= 10000000;i++){ oos.writeObject(p); } fos.close(); oos.close(); System.out.printf("这是Java序列化方式: 生成文件大小:[%d],用时:[%d]\n",java.length(),(System.currentTimeMillis()-start)); }}/*以上代码执行结果如下:这是Avro序列化方式: 生成文件大小:[220072462],用时:[2570]这是Java序列化方式: 生成文件大小:[50000139],用时:[9876]这是Hadoop序列化方式: 生成文件大小:[250000025],用时:[126290] */
经过测试对一个,序列化通一个对象时,用时最短的是Avro,用时最长的是Hadoop,生成文件最小的是Java,生成文件最大是Hadoop。
3>.编写反串行化代码
1 /* 2 @author :yinzhengjie 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/ 4 EMAIL:y1053419035@qq.com 5 */ 6 package cn.org.yinzhengjie.avro; 7 8 import org.apache.avro.file.DataFileReader; 9 import org.apache.avro.file.DataFileWriter;10 import org.apache.avro.io.DatumReader;11 import org.apache.avro.io.DatumWriter;12 import org.apache.avro.specific.SpecificDatumReader;13 import org.apache.avro.specific.SpecificDatumWriter;14 import tutorialspoint.com.Emp;15 16 import java.io.*;17 18 public class TestAvro {19 private static final File avro = new File("D:\\10.Java\\IDE\\yhinzhengjieData\\Avro\\emp.avroTest");20 21 public static void main(String[] args) throws IOException {22 AvroSerial();23 AvroDeserial();24 }25 26 //测试Avro序列化27 public static void AvroSerial() throws IOException {28 long start = System.currentTimeMillis();29 //定义对象,并给对象赋初值30 Emp yinzhengjie = new Emp();31 yinzhengjie.setId(1);32 yinzhengjie.setName("尹正杰");33 yinzhengjie.setAge(18);34 yinzhengjie.setSalary(80000);35 yinzhengjie.setAddress("北京");36 //初始化writer对象,我习惯称它为写入器37 DatumWriterdw = new SpecificDatumWriter (Emp.class);38 //初始化文件写入器39 DataFileWriter dfw = new DataFileWriter (dw);40 //开始序列化文件,这个 Emp2.SCHEMA$ 其实是一个常量,是咱们编译时自动生成的一个json格式的字符串。41 dfw.create(Emp.SCHEMA$,avro);42 //在序列化文件中追加对象43 for (int i = 0;i <= 10000000;i++){44 dfw.append(yinzhengjie);45 }46 //释放资源47 dfw.close();48 System.out.printf("这是Avro序列化方式: 生成文件大小:[%d],用时:[%d]\n",avro.length(),System.currentTimeMillis() - start);49 }50 51 52 //测试Avro反序列化53 public static void AvroDeserial() throws IOException {54 long start = System.currentTimeMillis();55 //初始化reader对象56 DatumReader dr = new SpecificDatumReader (Emp.class);57 //初始化文件阅读器58 DataFileReader dfr = new DataFileReader (avro,dr);59 //遍历阅读器里面的内容60 while (dfr.hasNext()){61 Emp emp = dfr.next();62 Integer id = emp.getId();63 CharSequence name = emp.getName();64 Integer age = emp.getAge();65 Integer salary = emp.getSalary();66 CharSequence address = emp.getAddress();67 // System.out.println(id + "|" + name + "|" + age + "|" + salary + "|" + address); 不建议打印,因为1000万条数据估计得30~40秒左右才能输出完成。68 }69 System.out.printf("这是Avro反序列化方式: 生成文件大小:[%d],用时:[%d]\n",avro.length(),System.currentTimeMillis() - start);70 }71 72 }73 74 75 /*76 以上代码执行结果如下:77 这是Avro序列化方式: 生成文件大小:[220072462],用时:[2640]78 这是Avro反序列化方式: 生成文件大小:[220072462],用时:[2866]79 */
4>.测试java、hadoop、avro对10000000个对象反串行化速度比对
1 /** 2 * Autogenerated by Avro 3 * 4 * DO NOT EDIT DIRECTLY 5 */ 6 package tutorialspoint.com; 7 8 import org.apache.avro.specific.SpecificData; 9 import org.apache.avro.message.BinaryMessageEncoder; 10 import org.apache.avro.message.BinaryMessageDecoder; 11 import org.apache.avro.message.SchemaStore; 12 13 @SuppressWarnings("all") 14 @org.apache.avro.specific.AvroGenerated 15 public class Emp extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { 16 private static final long serialVersionUID = 6405205887550658768L; 17 public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"Emp\",\"namespace\":\"tutorialspoint.com\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"id\",\"type\":\"int\"},{\"name\":\"salary\",\"type\":\"int\"},{\"name\":\"age\",\"type\":\"int\"},{\"name\":\"address\",\"type\":\"string\"}]}"); 18 public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; } 19 20 private static SpecificData MODEL$ = new SpecificData(); 21 22 private static final BinaryMessageEncoderENCODER = 23 new BinaryMessageEncoder (MODEL$, SCHEMA$); 24 25 private static final BinaryMessageDecoder DECODER = 26 new BinaryMessageDecoder (MODEL$, SCHEMA$); 27 28 /** 29 * Return the BinaryMessageDecoder instance used by this class. 30 */ 31 public static BinaryMessageDecoder getDecoder() { 32 return DECODER; 33 } 34 35 /** 36 * Create a new BinaryMessageDecoder instance for this class that uses the specified { @link SchemaStore}. 37 * @param resolver a { @link SchemaStore} used to find schemas by fingerprint 38 */ 39 public static BinaryMessageDecoder createDecoder(SchemaStore resolver) { 40 return new BinaryMessageDecoder (MODEL$, SCHEMA$, resolver); 41 } 42 43 /** Serializes this Emp to a ByteBuffer. */ 44 public java.nio.ByteBuffer toByteBuffer() throws java.io.IOException { 45 return ENCODER.encode(this); 46 } 47 48 /** Deserializes a Emp from a ByteBuffer. */ 49 public static Emp fromByteBuffer( 50 java.nio.ByteBuffer b) throws java.io.IOException { 51 return DECODER.decode(b); 52 } 53 54 @Deprecated public CharSequence name; 55 @Deprecated public int id; 56 @Deprecated public int salary; 57 @Deprecated public int age; 58 @Deprecated public CharSequence address; 59 60 /** 61 * Default constructor. Note that this does not initialize fields 62 * to their default values from the schema. If that is desired then 63 * one should use newBuilder()
. 64 */ 65 public Emp() {} 66 67 /** 68 * All-args constructor. 69 * @param name The new value for name 70 * @param id The new value for id 71 * @param salary The new value for salary 72 * @param age The new value for age 73 * @param address The new value for address 74 */ 75 public Emp(CharSequence name, Integer id, Integer salary, Integer age, CharSequence address) { 76 this.name = name; 77 this.id = id; 78 this.salary = salary; 79 this.age = age; 80 this.address = address; 81 } 82 83 public org.apache.avro.Schema getSchema() { return SCHEMA$; } 84 // Used by DatumWriter. Applications should not call. 85 public Object get(int field$) { 86 switch (field$) { 87 case 0: return name; 88 case 1: return id; 89 case 2: return salary; 90 case 3: return age; 91 case 4: return address; 92 default: throw new org.apache.avro.AvroRuntimeException("Bad index"); 93 } 94 } 95 96 // Used by DatumReader. Applications should not call. 97 @SuppressWarnings(value="unchecked") 98 public void put(int field$, Object value$) { 99 switch (field$) {100 case 0: name = (CharSequence)value$; break;101 case 1: id = (Integer)value$; break;102 case 2: salary = (Integer)value$; break;103 case 3: age = (Integer)value$; break;104 case 4: address = (CharSequence)value$; break;105 default: throw new org.apache.avro.AvroRuntimeException("Bad index");106 }107 }108 109 /**110 * Gets the value of the 'name' field.111 * @return The value of the 'name' field.112 */113 public CharSequence getName() {114 return name;115 }116 117 /**118 * Sets the value of the 'name' field.119 * @param value the value to set.120 */121 public void setName(CharSequence value) {122 this.name = value;123 }124 125 /**126 * Gets the value of the 'id' field.127 * @return The value of the 'id' field.128 */129 public Integer getId() {130 return id;131 }132 133 /**134 * Sets the value of the 'id' field.135 * @param value the value to set.136 */137 public void setId(Integer value) {138 this.id = value;139 }140 141 /**142 * Gets the value of the 'salary' field.143 * @return The value of the 'salary' field.144 */145 public Integer getSalary() {146 return salary;147 }148 149 /**150 * Sets the value of the 'salary' field.151 * @param value the value to set.152 */153 public void setSalary(Integer value) {154 this.salary = value;155 }156 157 /**158 * Gets the value of the 'age' field.159 * @return The value of the 'age' field.160 */161 public Integer getAge() {162 return age;163 }164 165 /**166 * Sets the value of the 'age' field.167 * @param value the value to set.168 */169 public void setAge(Integer value) {170 this.age = value;171 }172 173 /**174 * Gets the value of the 'address' field.175 * @return The value of the 'address' field.176 */177 public CharSequence getAddress() {178 return address;179 }180 181 /**182 * Sets the value of the 'address' field.183 * @param value the value to set.184 */185 public void setAddress(CharSequence value) {186 this.address = value;187 }188 189 /**190 * Creates a new Emp RecordBuilder.191 * @return A new Emp RecordBuilder192 */193 public static Builder newBuilder() {194 return new Builder();195 }196 197 /**198 * Creates a new Emp RecordBuilder by copying an existing Builder.199 * @param other The existing builder to copy.200 * @return A new Emp RecordBuilder201 */202 public static Builder newBuilder(Builder other) {203 return new Builder(other);204 }205 206 /**207 * Creates a new Emp RecordBuilder by copying an existing Emp instance.208 * @param other The existing instance to copy.209 * @return A new Emp RecordBuilder210 */211 public static Builder newBuilder(Emp other) {212 return new Builder(other);213 }214 215 /**216 * RecordBuilder for Emp instances.217 */218 public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase219 implements org.apache.avro.data.RecordBuilder {220 221 private CharSequence name;222 private int id;223 private int salary;224 private int age;225 private CharSequence address;226 227 /** Creates a new Builder */228 private Builder() {229 super(SCHEMA$);230 }231 232 /**233 * Creates a Builder by copying an existing Builder.234 * @param other The existing Builder to copy.235 */236 private Builder(Builder other) {237 super(other);238 if (isValidValue(fields()[0], other.name)) {239 this.name = data().deepCopy(fields()[0].schema(), other.name);240 fieldSetFlags()[0] = true;241 }242 if (isValidValue(fields()[1], other.id)) {243 this.id = data().deepCopy(fields()[1].schema(), other.id);244 fieldSetFlags()[1] = true;245 }246 if (isValidValue(fields()[2], other.salary)) {247 this.salary = data().deepCopy(fields()[2].schema(), other.salary);248 fieldSetFlags()[2] = true;249 }250 if (isValidValue(fields()[3], other.age)) {251 this.age = data().deepCopy(fields()[3].schema(), other.age);252 fieldSetFlags()[3] = true;253 }254 if (isValidValue(fields()[4], other.address)) {255 this.address = data().deepCopy(fields()[4].schema(), other.address);256 fieldSetFlags()[4] = true;257 }258 }259 260 /**261 * Creates a Builder by copying an existing Emp instance262 * @param other The existing instance to copy.263 */264 private Builder(Emp other) {265 super(SCHEMA$);266 if (isValidValue(fields()[0], other.name)) {267 this.name = data().deepCopy(fields()[0].schema(), other.name);268 fieldSetFlags()[0] = true;269 }270 if (isValidValue(fields()[1], other.id)) {271 this.id = data().deepCopy(fields()[1].schema(), other.id);272 fieldSetFlags()[1] = true;273 }274 if (isValidValue(fields()[2], other.salary)) {275 this.salary = data().deepCopy(fields()[2].schema(), other.salary);276 fieldSetFlags()[2] = true;277 }278 if (isValidValue(fields()[3], other.age)) {279 this.age = data().deepCopy(fields()[3].schema(), other.age);280 fieldSetFlags()[3] = true;281 }282 if (isValidValue(fields()[4], other.address)) {283 this.address = data().deepCopy(fields()[4].schema(), other.address);284 fieldSetFlags()[4] = true;285 }286 }287 288 /**289 * Gets the value of the 'name' field.290 * @return The value.291 */292 public CharSequence getName() {293 return name;294 }295 296 /**297 * Sets the value of the 'name' field.298 * @param value The value of 'name'.299 * @return This builder.300 */301 public Builder setName(CharSequence value) {302 validate(fields()[0], value);303 this.name = value;304 fieldSetFlags()[0] = true;305 return this;306 }307 308 /**309 * Checks whether the 'name' field has been set.310 * @return True if the 'name' field has been set, false otherwise.311 */312 public boolean hasName() {313 return fieldSetFlags()[0];314 }315 316 317 /**318 * Clears the value of the 'name' field.319 * @return This builder.320 */321 public Builder clearName() {322 name = null;323 fieldSetFlags()[0] = false;324 return this;325 }326 327 /**328 * Gets the value of the 'id' field.329 * @return The value.330 */331 public Integer getId() {332 return id;333 }334 335 /**336 * Sets the value of the 'id' field.337 * @param value The value of 'id'.338 * @return This builder.339 */340 public Builder setId(int value) {341 validate(fields()[1], value);342 this.id = value;343 fieldSetFlags()[1] = true;344 return this;345 }346 347 /**348 * Checks whether the 'id' field has been set.349 * @return True if the 'id' field has been set, false otherwise.350 */351 public boolean hasId() {352 return fieldSetFlags()[1];353 }354 355 356 /**357 * Clears the value of the 'id' field.358 * @return This builder.359 */360 public Builder clearId() {361 fieldSetFlags()[1] = false;362 return this;363 }364 365 /**366 * Gets the value of the 'salary' field.367 * @return The value.368 */369 public Integer getSalary() {370 return salary;371 }372 373 /**374 * Sets the value of the 'salary' field.375 * @param value The value of 'salary'.376 * @return This builder.377 */378 public Builder setSalary(int value) {379 validate(fields()[2], value);380 this.salary = value;381 fieldSetFlags()[2] = true;382 return this;383 }384 385 /**386 * Checks whether the 'salary' field has been set.387 * @return True if the 'salary' field has been set, false otherwise.388 */389 public boolean hasSalary() {390 return fieldSetFlags()[2];391 }392 393 394 /**395 * Clears the value of the 'salary' field.396 * @return This builder.397 */398 public Builder clearSalary() {399 fieldSetFlags()[2] = false;400 return this;401 }402 403 /**404 * Gets the value of the 'age' field.405 * @return The value.406 */407 public Integer getAge() {408 return age;409 }410 411 /**412 * Sets the value of the 'age' field.413 * @param value The value of 'age'.414 * @return This builder.415 */416 public Builder setAge(int value) {417 validate(fields()[3], value);418 this.age = value;419 fieldSetFlags()[3] = true;420 return this;421 }422 423 /**424 * Checks whether the 'age' field has been set.425 * @return True if the 'age' field has been set, false otherwise.426 */427 public boolean hasAge() {428 return fieldSetFlags()[3];429 }430 431 432 /**433 * Clears the value of the 'age' field.434 * @return This builder.435 */436 public Builder clearAge() {437 fieldSetFlags()[3] = false;438 return this;439 }440 441 /**442 * Gets the value of the 'address' field.443 * @return The value.444 */445 public CharSequence getAddress() {446 return address;447 }448 449 /**450 * Sets the value of the 'address' field.451 * @param value The value of 'address'.452 * @return This builder.453 */454 public Builder setAddress(CharSequence value) {455 validate(fields()[4], value);456 this.address = value;457 fieldSetFlags()[4] = true;458 return this;459 }460 461 /**462 * Checks whether the 'address' field has been set.463 * @return True if the 'address' field has been set, false otherwise.464 */465 public boolean hasAddress() {466 return fieldSetFlags()[4];467 }468 469 470 /**471 * Clears the value of the 'address' field.472 * @return This builder.473 */474 public Builder clearAddress() {475 address = null;476 fieldSetFlags()[4] = false;477 return this;478 }479 480 481 @SuppressWarnings("unchecked")482 public Emp build() {483 try {484 Emp record = new Emp();485 record.name = fieldSetFlags()[0] ? this.name : (CharSequence) defaultValue(fields()[0]);486 record.id = fieldSetFlags()[1] ? this.id : (Integer) defaultValue(fields()[1]);487 record.salary = fieldSetFlags()[2] ? this.salary : (Integer) defaultValue(fields()[2]);488 record.age = fieldSetFlags()[3] ? this.age : (Integer) defaultValue(fields()[3]);489 record.address = fieldSetFlags()[4] ? this.address : (CharSequence) defaultValue(fields()[4]);490 return record;491 } catch (Exception e) {492 throw new org.apache.avro.AvroRuntimeException(e);493 }494 }495 }496 497 @SuppressWarnings("unchecked")498 private static final org.apache.avro.io.DatumWriter 499 WRITER$ = (org.apache.avro.io.DatumWriter )MODEL$.createDatumWriter(SCHEMA$);500 501 @Override public void writeExternal(java.io.ObjectOutput out)502 throws java.io.IOException {503 WRITER$.write(this, SpecificData.getEncoder(out));504 }505 506 @SuppressWarnings("unchecked")507 private static final org.apache.avro.io.DatumReader 508 READER$ = (org.apache.avro.io.DatumReader )MODEL$.createDatumReader(SCHEMA$);509 510 @Override public void readExternal(java.io.ObjectInput in)511 throws java.io.IOException {512 READER$.read(this, SpecificData.getDecoder(in));513 }514 515 }
1 /* 2 @author :yinzhengjie 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/ 4 EMAIL:y1053419035@qq.com 5 */ 6 package cn.org.yinzhengjie.avro; 7 8 import java.io.Serializable; 9 10 public class Emp2 implements Serializable {11 private int id;12 private String name;13 private int age;14 private String address;15 private int salary;16 17 public int getId() {18 return id;19 }20 public void setId(int id) {21 this.id = id;22 }23 public String getName() {24 return name;25 }26 public void setName(String name) {27 this.name = name;28 }29 public int getAge() {30 return age;31 }32 public void setAge(int age) {33 this.age = age;34 }35 public String getAddress() {36 return address;37 }38 public void setAddress(String address) {39 this.address = address;40 }41 public int getSalary() {42 return salary;43 }44 public void setSalary(int salary) {45 this.salary = salary;46 }47 }
1 /* 2 @author :yinzhengjie 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/ 4 EMAIL:y1053419035@qq.com 5 */ 6 package cn.org.yinzhengjie.avro; 7 8 import org.apache.hadoop.io.Writable; 9 import tutorialspoint.com.Emp;10 11 import java.io.DataInput;12 import java.io.DataOutput;13 import java.io.IOException;14 15 public class EmpWritable implements Writable {16 private Emp emp = new Emp();17 18 public Emp getEmp() {19 return emp;20 }21 22 public void setEmp(Emp emp) {23 this.emp = emp;24 25 }26 27 28 public EmpWritable() {29 }30 31 //串行化32 public void write(DataOutput dataOutput) throws IOException {33 dataOutput.writeUTF((String)emp.getName());34 dataOutput.writeInt(emp.getId());35 dataOutput.writeInt(emp.getSalary());36 dataOutput.writeInt(emp.getAge());37 dataOutput.writeUTF((String)emp.getAddress());38 }39 40 //发串行化41 public void readFields(DataInput dataInput) throws IOException {42 emp.setName((CharSequence)dataInput.readUTF());43 emp.setId(dataInput.readInt());44 emp.setSalary(dataInput.readInt());45 emp.setAge(dataInput.readInt());46 emp.setAddress((CharSequence)dataInput.readUTF());47 }48 }
1 /* 2 @author :yinzhengjie 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/ 4 EMAIL:y1053419035@qq.com 5 */ 6 package cn.org.yinzhengjie.avro; 7 8 import org.apache.avro.file.DataFileReader; 9 import org.apache.avro.io.DatumReader;10 import org.apache.avro.specific.SpecificDatumReader;11 import tutorialspoint.com.Emp;12 13 import java.io.*;14 15 public class test {16 private static final File avro = new File("D:\\10.Java\\IDE\\yhinzhengjieData\\Avro\\emp.avroTest");17 private static final File java = new File("D:\\10.Java\\IDE\\yhinzhengjieData\\Avro\\emp.javaTest");18 private static final File hadoop = new File("D:\\10.Java\\IDE\\yhinzhengjieData\\Avro\\emp.hadoopTest");19 20 public static void main(String[] args) throws Exception {21 AvroDeserial();22 JavaDeserial();23 HadoopDeserial();24 }25 26 //测试hadoop的反序列化方式27 public static void HadoopDeserial() throws IOException {28 long start = System.currentTimeMillis();29 EmpWritable ew = new EmpWritable();30 FileInputStream fis = new FileInputStream(hadoop);31 DataInputStream dis = new DataInputStream(fis);32 33 for (int i = 0;i <= 10000000;i++){34 ew.readFields(dis);35 // Emp emp = ew.getEmp();36 // System.out.println(emp);37 }38 fis.close();39 dis.close();40 System.out.printf("这是Hadoop反序列化方式: 反序列化文件大小:[%d],用时:[%d]\n",hadoop.length(),System.currentTimeMillis() - start);41 }42 43 44 //测试Avro反序列化45 public static void AvroDeserial() throws IOException {46 long start = System.currentTimeMillis();47 //初始化reader对象48 DatumReaderdr = new SpecificDatumReader (Emp.class);49 //初始化文件阅读器50 DataFileReader dfr = new DataFileReader (avro,dr);51 //遍历阅读器里面的内容52 while (dfr.hasNext()){53 Emp emp = dfr.next();54 Integer id = emp.getId();55 CharSequence name = emp.getName();56 Integer age = emp.getAge();57 Integer salary = emp.getSalary();58 CharSequence address = emp.getAddress();59 // System.out.println(id + "|" + name + "|" + age + "|" + salary + "|" + address); 不建议打印,因为1000万条数据估计得30~40秒左右才能输出完成。60 }61 System.out.printf("这是Avro反序列化方式: 反序列化文件大小:[%d],用时:[%d]\n",avro.length(),System.currentTimeMillis() - start);62 }63 64 //测试Java反序列化65 public static void JavaDeserial() throws Exception {66 long start = System.currentTimeMillis();67 68 FileInputStream fis = new FileInputStream(java);69 ObjectInputStream ois = new ObjectInputStream(fis);70 for (int i = 0;i <= 10000000;i++){71 Emp2 emp = (Emp2) ois.readObject();72 }73 fis.close();74 ois.close();75 System.out.printf("这是Java反序列化方式: 反序列化文件大小:[%d],用时:[%d]\n",java.length(),(System.currentTimeMillis()-start));76 }77 }78 79 80 /*81 以上代码执行结果如下:82 这是Avro反序列化方式: 反序列化文件大小:[220072462],用时:[3479]83 这是Java反序列化方式: 反序列化文件大小:[50000139],用时:[15677]84 这是Hadoop反序列化方式: 反序列化文件大小:[250000025],用时:[134628]85 */
经测试,将一个对象进行1000万次序列化到一个文件后,在反序列化回来,用时最短的依然是Avro,Java用时次之,Hadoop依然用时最长。
四.avro通过不生成代码,直接使用schema的方式对对象进行串行化
通过上面的编程发现如果想要通过Avro进行序列化很繁琐,在序列化之前需要准备环境,需要通过编译工具生成代码,然后在导入IDE中,进行编程,那有么有不生成代码就可以进行序列化操作的呢?答案是肯定的,大致流程如下图:
实现代码如下:
1 /* 2 @author :yinzhengjie 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E8%BF%9B%E9%98%B6%E4%B9%8B%E8%B7%AF/ 4 EMAIL:y1053419035@qq.com 5 */ 6 package cn.org.yinzhengjie.avro; 7 8 import org.apache.avro.Schema; 9 import org.apache.avro.file.DataFileReader;10 import org.apache.avro.file.DataFileWriter;11 import org.apache.avro.generic.GenericData;12 import org.apache.avro.generic.GenericRecord;13 import org.apache.avro.io.DatumReader;14 import org.apache.avro.io.DatumWriter;15 import org.apache.avro.specific.SpecificDatumReader;16 import org.apache.avro.specific.SpecificDatumWriter;17 18 import java.io.File;19 import java.io.IOException;20 21 public class avroSchema {22 private static final File avscFile = new File("D:\\10.Java\\IDE\\yhinzhengjieData\\Avro\\emp.avsc");23 private static final File avro = new File("D:\\10.Java\\IDE\\yhinzhengjieData\\Avro\\schema.avro");24 public static void main(String[] args) throws IOException {25 AvroSerial();26 AvroDeserial();27 }28 29 //通过Schema的方式序列化30 private static void AvroSerial() throws IOException {31 long start = System.currentTimeMillis();32 //通过文件解析schema,生成schema对象,因此需要传入emp.avsc的路径33 Schema schema = new Schema.Parser().parse(avscFile);34 //将schema变成了类似与map的对象35 GenericRecord yinzhengjie = new GenericData.Record(schema);36 yinzhengjie.put("id",1);37 yinzhengjie.put("name","尹正杰");38 yinzhengjie.put("age",18);39 yinzhengjie.put("salary",80000);40 yinzhengjie.put("address","北京");41 //初始化writer对象,注意,泛型应写为:GenericRecord42 DatumWriterdw = new SpecificDatumWriter (schema);43 //初始化文件写入器,注意,泛型应写为:GenericRecord44 DataFileWriter dfw = new DataFileWriter (dw);45 //定义写入的路径46 dfw.create(schema,avro);47 for (int i = 0;i <= 10000000;i++){48 dfw.append(yinzhengjie);49 }50 System.out.printf("这是Avro序列化方式: 生成文件大小:[%d],用时:[%d]\n",avro.length(),System.currentTimeMillis() - start);51 }52 53 //通过Schema的方式序列化54 private static void AvroDeserial() throws IOException {55 long start = System.currentTimeMillis();56 //通过文件解析schema,生成schema对象,因此需要传入emp.avsc的路径57 Schema schema = new Schema.Parser().parse(avscFile);58 //将schema变成了类似与map的对象59 GenericRecord yinzhengjie = new GenericData.Record(schema);60 61 //初始化Reader对象,注意,泛型应写为:GenericRecord62 DatumReader dr = new SpecificDatumReader (schema);63 //初始化文件阅读器,注意,泛型应写为:GenericRecord64 DataFileReader dfr = new DataFileReader (avro,dr);65 //遍历66 while (dfr.hasNext()){67 GenericRecord record = dfr.next();68 // System.out.println(record);69 }70 System.out.printf("这是Avro反序列化方式: 反序列化文件大小:[%d],用时:[%d]\n",avro.length(),System.currentTimeMillis() - start);71 }72 }73 74 /*75 以上代码执行结果如下:76 这是Avro序列化方式: 生成文件大小:[220045139],用时:[2408]77 这是Avro反序列化方式: 反序列化文件大小:[220045139],用时:[2614]78 */