Skip to content
Snippets Groups Projects
Commit 97b7c0e8 authored by Sudeep Sahadevan's avatar Sudeep Sahadevan
Browse files

Merge branch 'master' into 'master'

Adds slides for session 17

See merge request !8
parents 760a96d2 9592f4e8
No related branches found
No related tags found
1 merge request!8Adds slides for session 17
Showing
with 350 additions and 0 deletions
"INTRO"
"""
- This talk is based on static type checking (i.e. pyright, mypy);
- If you're not using static type hints, give it a shot =D
- But static types only really exist INSIDE your program;
- Anything going IN or OUT of your program can't be trusted ;
- It is hard to do serialization right;
- And it is harder to write in a way that can be meaningfully
checked by pyright/mypy
"""
\ No newline at end of file
"CAVEATS"
"""
- We're focusing on correctness;
- Which can come at the expense of convenience;
- We want our code to be automatically checked;
- We may end up with pepetitive code
- which is is ok, because it always gets flagged if
a mistake is made;
- Dealing with old versions of serialized data is
its own can of worms;
- You have to detect old versions and try to convert it;
- We probably won't cover it today.
"""
\ No newline at end of file
# pyright: strict
import json
from dataclasses import dataclass
@dataclass
class Person:
name: str
def to_json(self) -> str:
raw_data = {"name": self.name}
return json.dumps(raw_data)
@classmethod
def from_json(cls, data: str) -> "Person":
parsed = json.loads(data)
return Person(name=parsed["name"])
#and this works
bob = Person("Bob")
serialized_bob = bob.to_json()
deserialized_bob = Person.from_json(serialized_bob)
assert bob.name == "Bob"
# pyright: strict
import json
from dataclasses import dataclass
@dataclass
class Person:
name: str
age: int # ADDED NEW FIELD!!
def to_json(self) -> str:
raw_data = {"name": self.name} # OOPS! where is age?
return json.dumps(raw_data)
@classmethod
def from_json(cls, data: str) -> "Person":
parsed = json.loads(data)
return Person(name=parsed["name"]) # AHA! pyright saved us here
# And we have the first insight: deserialization is safer than serialization.
# "from_json" is flagged as wrong, but "to_json" isn't
# this will break even after fixing "from_json"
# because we forgot to serialize "age"!!!!!!
bob = Person("Bob", 50)
serialized_bob = bob.to_json()
deserialized_bob = Person.from_json(serialized_bob)
# pyright: strict
import json
from dataclasses import dataclass
@dataclass
class Person:
name: str
age: int
def to_json(self) -> str:
# Now we can't forget any field!
return json.dumps(self.__dict__)
@classmethod
def from_json(cls, data: str) -> "Person":
parsed = json.loads(data)
# Hopefully this looks weird to everyone
keys = Person("dummy", 0).__dict__.keys()
return Person(**{
key: parsed[key] for key in keys
})
# but this works!
bob = Person("Bob", 50)
serialized_bob = bob.to_json()
deserialized_bob = Person.from_json(serialized_bob)
assert deserialized_bob.name == "Bob"
assert deserialized_bob.age == 50
# pyright: strict
import json
from dataclasses import dataclass
@dataclass
class Color:
hex_code: str
def to_json(self) -> str:
return json.dumps(self.__dict__)
@classmethod
def from_json(cls, data: str) -> "Color":
parsed = json.loads(data)
return Color(parsed["hex_code"])
@dataclass
class Person:
name: str
fav_color: Color # Adds A non-primitive field!!
def to_json(self) -> str:
# No errors, but WRONG. Where is Color.to_json()?
return json.dumps(self.__dict__)
@classmethod
def from_json(cls, data: str) -> "Person":
parsed = json.loads(data)
# Hopefully this looks weird to everyone
keys = Person("dummy", Color("#ffffff")).__dict__.keys()
return Person(**{
# This is super WRONG. Where is Color.from_json?
key: parsed[key] for key in keys
})
ser_person = Person("Bob", fav_color=Color("#ffffff")).to_json()
# So __dict__ is a bad idea.
# It's clunky to use in from_json and it can't handle non-primitive types
# Let's try to fix 'from_json' first
\ No newline at end of file
from typing import Sequence, Union, Mapping
"""
Composing doesn't work well if from_json expects the raw json string.
Let's try something more flexible:
"""
JsonLeafValue = Union[int, float, str, bool, None]
# e.g.: 1, 3.14, "hello", True, None
JsonObject = Mapping[str, "JsonValue"]
#e.g.: {"a": 1, "b": 3.14, "c": "hello", "d": True, "e": None}
JsonArray = Sequence["JsonValue"]
#e.g: [
# 1,
# 2.3,
# "hello",
# True,
# None,
# {"bla": 123},
# ["another", "array", 123]
#]
JsonValue = Union[JsonLeafValue, JsonArray, JsonObject]
# e.g.: any of the above
\ No newline at end of file
# pyright: strict
from dataclasses import dataclass
from collections.abc import Mapping
from slide_06_json_types import JsonObject, JsonValue
@dataclass
class Color:
hex_code: str
def to_json(self) -> JsonObject:
return self.__dict__
@classmethod
def from_json(cls, data: JsonValue) -> "Color":
# if not isinstance(data, Mapping): # uncomment to fix
# raise ValueError("expecting object") # uncomment to fix
hex_code = data["hex_code"]
# if not isinstance(hex_code, str): # uncomment to fix
# raise ValueError("Expected string") # uncomment to fix
return Color(hex_code=hex_code)
@dataclass
class Person:
name: str
fav_color: Color
def to_json(self) -> JsonObject:
return {
"name": self.name,
"fav_color": self.fav_color.to_json()
}
@classmethod
def from_json(cls, data: JsonValue) -> "Person":
# if not isinstance(data, Mapping): # uncomment to fix
# raise ValueError("Expected mapping") # uncomment to fix
name = data["name"]
# if not isinstance(name, str): # uncomment to fix
# raise ValueError("Expected string") # uncomment to fix
fav_color = Color.from_json(data["fav_color"]) # Ok, NOW it's fixed
return Person(name=name, fav_color=fav_color)
ser_person = Person("Bob", fav_color=Color("#ffffff")).to_json()
person = Person.from_json(ser_person)
\ No newline at end of file
# pyright: strict
from dataclasses import dataclass
from collections.abc import Mapping
from slide_06_json_types import JsonObject, JsonValue
@dataclass
class Person:
name: str
age: int # NEW FIELD!
def to_json(self) -> JsonObject:
return {
"name": self.name,
# OOPS! Forgot to add age here!
}
@classmethod
def from_json(cls, data: JsonValue) -> "Person":
if not isinstance(data, Mapping):
raise ValueError("Expected mapping")
name = data["name"]
if not isinstance(name, str):
raise ValueError("Expected string")
age = data["age"] # THIS WILL ALWAYS FAIL!
if not isinstance(age, int):
raise ValueError("Expected int")
return Person(name=name, age=age)
# Insight: Serialization can go out of sync with deserialization
# because serialization is not "type-unsafe". It can output
# whatever it wants
\ No newline at end of file
# pyright: strict
from dataclasses import dataclass
import threading
from slides.slide_06_json_types import JsonObject, JsonValue
# We'll use "Message" types that only contain
# fields that MUST be serialized
@dataclass
class PersonMessage:
name: str
age: int
def to_json(self) -> JsonObject:
raise NotImplemented # assume magic for now
@classmethod
def from_json(cls, value: JsonValue) -> JsonObject:
raise NotImplemented # assume magic for now
@dataclass
class Person:
name: str
age: int
lock: threading.Lock # non-serializable field
def to_message(self) -> PersonMessage:
# can't forget to serialize any of the fields from PersonMessage
return PersonMessage(name=self.name, age=self.age)
@classmethod
def from_message(cls, data: PersonMessage) -> "Person":
# PersonMessage MUST have enough information to make the constructor happy
return Person(name=data.name, age=data.age, lock=threading.Lock())
# Insight: Serializing to a "Message" type can help keep
# serialization and deserialization in sync
\ No newline at end of file
# pyright: strict
from abc import ABC, abstractmethod
from collections.abc import Mapping
from dataclasses import dataclass
from slides.slide_06_json_types import JsonObject, JsonValue
class Message(ABC):
@abstractmethod
def to_json(self) -> JsonValue:
pass
# All the to_json and from_json logic here should be autogenerated:
# 1 - Make sure all dict keys are the same in to_json and from_json
# 2 - Make sure we call self.some_field.to_json() only for non-primitive types
# 3 - make sure that the types of what we are reading are what we expected
@dataclass
class ColorMessage(Message):
hex_code: str
def to_json(self) -> JsonObject: # automatically generated
return {"hex_code": self.hex_code}
@classmethod
def from_json(cls, value: JsonValue) -> "ColorMessage": # automatically generated
if not isinstance(value, Mapping):
raise ValueError("Expected mapping")
hex_code = value["hex_code"]
if not isinstance(hex_code, str):
raise ValueError("Expected str")
return ColorMessage(hex_code=hex_code)
@dataclass
class PersonMessage(Message):
name: str
color: ColorMessage
def to_json(self) -> JsonObject: # automatically generated
return {"name": self.name, "color": self.color.to_json()}
def from_json(self, value: JsonValue) -> "PersonMessage": # automatically generated
if not isinstance(value, Mapping):
raise ValueError("expected json object")
name = value["name"]
if not isinstance(name, str):
raise ValueError("Expected string")
color = ColorMessage.from_json(value["color"])
return PersonMessage(name=name, color=color)
# It might feel like we moved in circles but:
# 1 - By using Json types (e.g. JsonObject, JsonLeaf, JsonArray) we make sure
# we're getting the values of types we expected (e.g.: no str's going into int's);
#
# 2 - By using Message types we've made sure that serialization and
# deserialization are in sync: We output what we expect to get back as input.
#
# 3 - By doing code-generation we make sure that the unsafe parts
# of serialization are done automatically:
# -> No typos on strings (e.g.: can't serialize self.color as "colour");
# -> If we messed up the codegen, the type checker will still
# look at it and flag the errors (not the case with self.__dict__)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment