pycriu: images: pb2dict: perform pb<->json conversion properly

Currently we are handling conversion in the simplest way, by just parsing protobuf text format representation of pb message, splitting string in half and storing values wrapped in ""-s in python dict. Which leads to a bunch of errors, like, for example, when converting bytes fields. It also doesn't handle types properly. This patch introduces a proper way to handle pb<->json conversion by iterating through pb fields and properly storing them in dict or properly extracting values from dict. Signed-off-by: Ruslan Kuprieiev <kupruser@gmail.com> Signed-off-by: Pavel Emelyanov <xemul@parallels.com>

pycriu: images: pb2dict: perform pb<->json conversion properly
Currently we are handling conversion in the simplest way, by just parsing protobuf text format representation of pb message, splitting string in half and storing values wrapped in ""-s in python dict. Which leads to a bunch of errors, like, for example, when converting bytes fields. It also doesn't handle types properly. This patch introduces a proper way to handle pb<->json conversion by iterating through pb fields and properly storing them in dict or properly extracting values from dict. Signed-off-by: Ruslan Kuprieiev <kupruser@gmail.com> Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
a9ccdccc · Ruslan Kuprieiev · Pavel Emelyanov · 8878793d · a9ccdccc
Commit a9ccdccc authored Jan 15, 2015 by Ruslan Kuprieiev Committed by Pavel Emelyanov Jan 15, 2015
Hide whitespace changes
Inline Side-by-side

Showing with 89 additions and 71 deletions

pb2dict.py pycriu/images/pb2dict.py +89 -71

No files found.
--- a/pycriu/images/pb2dict.py
+++ b/pycriu/images/pb2dict.py
-import google
-import io
+from google.protobuf.descriptor import FieldDescriptor as FD

-# pb2dict and dict2pb are using protobuf text format to
-# convert protobuf msgs to/from dictionary.
+# pb2dict and dict2pb are methods to convert pb to/from dict.
+# Inspired by:
+#   protobuf-to-dict - https://github.com/benhodgson/protobuf-to-dict
+#   protobuf-json    - https://code.google.com/p/protobuf-json/
+#   protobuf source  - https://code.google.com/p/protobuf/
+# Both protobuf-to-dict/json do not fit here because of several reasons,
+# here are some of them:
+#   - both have a common bug in treating optional field with empty
+#     repeated inside.
+#   - protobuf-to-json is not avalible in pip or in any other python
+#     repo, so it is hard to distribute and we can't rely on it.
+#   - both do not treat enums in a way we would like to. They convert
+#     protobuf enum to int, but we need a string here, because it is
+#     much more informative. BTW, protobuf text_format converts pb
+#     enums to string value too. (i.e. "march : x86_64" is better then
+#     "march : 1").
+
+
+_basic_cast = {
+	FD.TYPE_DOUBLE		: float,
+	FD.TYPE_FLOAT		: float,
+	FD.TYPE_FIXED64		: float,
+	FD.TYPE_FIXED32		: float,
+	FD.TYPE_SFIXED64	: float,
+	FD.TYPE_SFIXED32	: float,
+
+	FD.TYPE_INT64		: long,
+	FD.TYPE_UINT64		: long,
+	FD.TYPE_SINT64		: long,
+
+	FD.TYPE_INT32		: int,
+	FD.TYPE_UINT32		: int,
+	FD.TYPE_SINT32		: int,
+
+	FD.TYPE_BOOL		: bool,
+
+	FD.TYPE_STRING		: unicode
+}
+
+def _pb2dict_cast(field, value):
+	if field.type == FD.TYPE_MESSAGE:
+		return pb2dict(value)
+	elif field.type == FD.TYPE_BYTES:
+		return value.encode('base64')
+	elif field.type == FD.TYPE_ENUM:
+		return field.enum_type.values_by_number.get(value, None).name
+	elif field.type in _basic_cast:
+		return _basic_cast[field.type](value)
+	else:
+		raise Exception("Field(%s) has unsupported type %d" % (field.name, field.type))

 def pb2dict(pb):
 	"""
 	Convert protobuf msg to dictionary.
 	Takes a protobuf message and returns a dict.
 	"""
-	pb_text = io.BytesIO('')
-	google.protobuf.text_format.PrintMessage(pb, pb_text)
-	pb_text.seek(0)
-	return _text2dict(pb_text)
-
-def _text2dict(pb_text):
-	"""
-	Convert protobuf text format msg to dict
-	Takes a protobuf message in text format and
-	returns a dict.
-	"""
 	d = {}
-	while True:
-		s = pb_text.readline()
-		s.strip()
-		if s == '' or '}' in s:
-			break
-
-		name, value = s.split()
-		if value == '{':
-			value = _text2dict(pb_text)
-		elif name.endswith(':'):
-			name = name[:-1]
-		else:
-			raise Exception("Unknown format" + s)
-
-		if d.get(name):
-			if not isinstance(d[name], list):
-				d[name] = [d[name]]
-			d[name].append(value)
+	for field, value in pb.ListFields():
+		if field.label == FD.LABEL_REPEATED:
+			d_val = []
+			for v in value:
+				d_val.append(_pb2dict_cast(field, v))
 		else:
-			d[name] = value
+			d_val = _pb2dict_cast(field, value)

+		d[field.name] = d_val
 	return d

+def _dict2pb_cast(field, value):
+	# Not considering TYPE_MESSAGE here, as repeated
+	# and non-repeated messages need special treatment
+	# in this case, and are hadled separately.
+	if field.type == FD.TYPE_BYTES:
+		return value.decode('base64')
+	elif field.type == FD.TYPE_ENUM:
+		return field.enum_type.values_by_name.get(value, None).number
+	else:
+		return _basic_cast[field.type](value)
+
 def dict2pb(d, pb):
 	"""
 	Convert dictionary to protobuf msg.
 	Takes dict and protobuf message to be merged into.
 	"""
-	pb_text = io.BytesIO('')
-	_dict2text(d, pb_text, 0)
-	pb_text.seek(0)
-	s = pb_text.read()
-	google.protobuf.text_format.Merge(s, pb)
-
-def _write_struct(name, text, indent, inside):
-	"""
-	Convert "inside" dict to protobuf text format
-	wrap it inside block named "name" and write
-	it to "text".
-	"""
-	text.write(indent*" " + name.encode() + " {\n")
-	_dict2text(inside, text, indent+2)
-	text.write(indent*" " + "}\n")
-
-def _write_field(name, value, text, indent):
-	"""
-	Write "name: value" to "text".
-	"""
-	text.write(indent*" " + name.encode() + ": " + value.encode() + "\n")
-
-def _dict2text(d, pb_text, indent):
-	"""
-	Convert dict to protobuf text format.
-	Takes dict, protobuf message in text format and a number
-	of spaces to be put before each field.
-	"""
-	for name, value in d.iteritems():
-		if isinstance(value, unicode):
-			_write_field(name, value, pb_text, indent)
-		elif isinstance(value, list):
-			for x in value:
-				if isinstance(x, dict):
-					_write_struct(name, pb_text, indent, x)
+	for field in pb.DESCRIPTOR.fields:
+		if field.name not in d:
+			continue
+		value = d[field.name]
+		if field.label == FD.LABEL_REPEATED:
+			pb_val = getattr(pb, field.name, None)
+			for v in value:
+				if field.type == FD.TYPE_MESSAGE:
+					dict2pb(v, pb_val.add())
 				else:
-					_write_field(name, x, pb_text, indent)
+					pb_val.append(_dict2pb_cast(field, v))
 		else:
-			_write_struct(name, pb_text, indent, value)
+			if field.type == FD.TYPE_MESSAGE:
+				# SetInParent method acts just like has_* = true in C,
+				# and helps to properly treat cases when we have optional
+				# field with empty repeated inside.
+				getattr(pb, field.name).SetInParent()
+
+				dict2pb(value, getattr(pb, field.name, None))
+			else:
+				setattr(pb, field.name, _dict2pb_cast(field, value))
+	return pb