FSharp.Data


Anonymizing JSON

BinderScriptNotebook

This tutorial shows how to implement an anonymizer for a JSON document (represented using the JsonValue type discussed in JSON parser article) This functionality is not directly available in the FSharp.Data package, but it can be very easily implemented by recursively walking over the JSON document.

If you want to use the JSON anonymizer in your code, you can copy the source from GitHub and just include it in your project. If you use these functions often and would like to see them in the FSharp.Data package, please submit a feature request.

DISCLAIMER: Don't use this for sensitive data as it's just a sample

open System
open System.Globalization
open FSharp.Data

type JsonAnonymizer(?propertiesToSkip, ?valuesToSkip) = 

  let propertiesToSkip = Set.ofList (defaultArg propertiesToSkip [])
  let valuesToSkip = Set.ofList (defaultArg valuesToSkip [])

  let rng = Random()

  let digits = [| '0' .. '9' |]
  let lowerLetters = [| 'a' .. 'z' |]
  let upperLetters = [| 'A' .. 'Z' |]

  let getRandomChar (c:char) =
      if Char.IsDigit c then digits.[rng.Next(10)]
      elif Char.IsLetter c then
          if Char.IsLower c
          then lowerLetters.[rng.Next(26)]
          else upperLetters.[rng.Next(26)]
      else c

  let randomize (str:string) =
      String(str.ToCharArray() |> Array.map getRandomChar)

  let rec anonymize json =
      match json with
      | JsonValue.String s when valuesToSkip.Contains s -> json
      | JsonValue.String s ->
          let typ = 
            Runtime.StructuralInference.inferPrimitiveType 
              CultureInfo.InvariantCulture s

          ( if typ = typeof<Guid> then Guid.NewGuid().ToString()
            elif typ = typeof<Runtime.StructuralTypes.Bit0> || 
              typ = typeof<Runtime.StructuralTypes.Bit1> then s
            elif typ = typeof<DateTime> then s
            else 
              let prefix, s =
                if s.StartsWith "http://" then 
                  "http://", s.Substring("http://".Length)
                elif s.StartsWith "https://" then 
                  "https://", s.Substring("https://".Length)
                else "", s
              prefix + randomize s )
          |> JsonValue.String
      | JsonValue.Number d -> 
          let typ = 
            Runtime.StructuralInference.inferPrimitiveType 
              CultureInfo.InvariantCulture (d.ToString())
          if typ = typeof<Runtime.StructuralTypes.Bit0> || 
            typ = typeof<Runtime.StructuralTypes.Bit1> then json
          else d.ToString() |> randomize |> Decimal.Parse |> JsonValue.Number
      | JsonValue.Float f -> 
          f.ToString()
          |> randomize 
          |> Double.Parse 
          |> JsonValue.Float
      | JsonValue.Boolean _  | JsonValue.Null -> json
      | JsonValue.Record props -> 
          props 
          |> Array.map (fun (key, value) ->
              let newValue = if propertiesToSkip.Contains key then value else anonymize value
              key, newValue)
          |> JsonValue.Record
      | JsonValue.Array array -> 
          array 
          |> Array.map anonymize 
          |> JsonValue.Array

  member __.Anonymize json = anonymize json

let json = JsonValue.Load (__SOURCE_DIRECTORY__ + "../../data/TwitterStream.json")
printfn "%O" json

let anonymizedJson = (JsonAnonymizer ["lang"]).Anonymize json
printfn "%O" anonymizedJson

Related articles

namespace System
namespace System.Globalization
Multiple items
namespace FSharp

--------------------
namespace Microsoft.FSharp
Multiple items
namespace FSharp.Data

--------------------
namespace Microsoft.FSharp.Data
Multiple items
type JsonAnonymizer =
  new : ?propertiesToSkip:string list * ?valuesToSkip:string list -> JsonAnonymizer
  member Anonymize : json:JsonValue -> JsonValue

--------------------
new : ?propertiesToSkip:string list * ?valuesToSkip:string list -> JsonAnonymizer
val propertiesToSkip : string list option
val valuesToSkip : string list option
val propertiesToSkip : Set<string>
Multiple items
module Set

from Microsoft.FSharp.Collections

--------------------
type Set<'T (requires comparison)> =
  interface IReadOnlyCollection<'T>
  interface IComparable
  interface IEnumerable
  interface IEnumerable<'T>
  interface ICollection<'T>
  new : elements:seq<'T> -> Set<'T>
  member Add : value:'T -> Set<'T>
  member Contains : value:'T -> bool
  override Equals : obj -> bool
  member IsProperSubsetOf : otherSet:Set<'T> -> bool
  ...

--------------------
new : elements:seq<'T> -> Set<'T>
val ofList : elements:'T list -> Set<'T> (requires comparison)
val defaultArg : arg:'T option -> defaultValue:'T -> 'T
val valuesToSkip : Set<string>
val rng : Random
Multiple items
type Random =
  new : unit -> unit + 1 overload
  member Next : unit -> int + 2 overloads
  member NextBytes : buffer: byte [] -> unit + 1 overload
  member NextDouble : unit -> float
  member Sample : unit -> float

--------------------
Random() : Random
Random(Seed: int) : Random
val digits : char []
val lowerLetters : char []
val upperLetters : char []
val getRandomChar : (char -> char)
val c : char
Multiple items
val char : value:'T -> char (requires member op_Explicit)

--------------------
[<Struct>]
type char = Char
[<Struct>]
type Char =
  member CompareTo : value: char -> int + 1 overload
  member Equals : obj: char -> bool + 1 overload
  member GetHashCode : unit -> int
  member GetTypeCode : unit -> TypeCode
  member System.IConvertible.ToBoolean : provider: IFormatProvider -> bool
  member System.IConvertible.ToByte : provider: IFormatProvider -> byte
  member System.IConvertible.ToChar : provider: IFormatProvider -> char
  member System.IConvertible.ToDateTime : provider: IFormatProvider -> DateTime
  member System.IConvertible.ToDecimal : provider: IFormatProvider -> decimal
  member System.IConvertible.ToDouble : provider: IFormatProvider -> float
  ...
Char.IsDigit(c: char) : bool
Char.IsDigit(s: string, index: int) : bool
Random.Next() : int
Random.Next(maxValue: int) : int
Random.Next(minValue: int, maxValue: int) : int
Char.IsLetter(c: char) : bool
Char.IsLetter(s: string, index: int) : bool
Char.IsLower(c: char) : bool
Char.IsLower(s: string, index: int) : bool
val randomize : (string -> String)
val str : string
Multiple items
val string : value:'T -> string

--------------------
type string = String
Multiple items
type String =
  interface IEnumerable<char>
  interface IEnumerable
  interface ICloneable
  interface IComparable
  interface IComparable<string>
  interface IConvertible
  interface IEquatable<string>
  new : value: nativeptr<char> -> unit + 8 overloads
  member Clone : unit -> obj
  member CompareTo : value: obj -> int + 1 overload
  ...

--------------------
String(value: nativeptr<char>) : String
String(value: char []) : String
String(value: ReadOnlySpan<char>) : String
String(value: nativeptr<sbyte>) : String
String(c: char, count: int) : String
String(value: nativeptr<char>, startIndex: int, length: int) : String
String(value: char [], startIndex: int, length: int) : String
String(value: nativeptr<sbyte>, startIndex: int, length: int) : String
String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: Text.Encoding) : String
String.ToCharArray() : char []
String.ToCharArray(startIndex: int, length: int) : char []
type Array =
  interface ICollection
  interface IEnumerable
  interface IList
  interface IStructuralComparable
  interface IStructuralEquatable
  interface ICloneable
  new : unit -> unit
  member Clone : unit -> obj
  member CopyTo : array: Array * index: int -> unit + 1 overload
  member GetEnumerator : unit -> IEnumerator
  ...
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []
val anonymize : (JsonValue -> JsonValue)
val json : JsonValue
type JsonValue =
  | String of string
  | Number of decimal
  | Float of float
  | Record of properties: (string * JsonValue) []
  | Array of elements: JsonValue []
  | Boolean of bool
  | Null
    member private PrepareRequest : httpMethod:string option * headers:#seq<string * string> option -> HttpRequestBody * (string * string) list * string
    member Request : url:string * ?httpMethod:string * ?headers:seq<string * string> -> HttpResponse
    member RequestAsync : url:string * ?httpMethod:string * ?headers:seq<string * string> -> Async<HttpResponse>
    member ToString : saveOptions:JsonSaveOptions -> string + 1 overload
    member WriteTo : w:TextWriter * saveOptions:JsonSaveOptions -> unit
    static member AsyncLoad : uri:string * ?encoding:Encoding -> Async<JsonValue>
    static member private JsonStringEncodeTo : w:TextWriter -> value:string -> unit
    static member Load : stream:Stream -> JsonValue + 2 overloads
    static member Parse : text:string -> JsonValue
    static member ParseMultiple : text:string -> seq<JsonValue>
    ...
union case JsonValue.String: string -> JsonValue
val s : string
member Set.Contains : value:'T -> bool
val typ : Type
Multiple items
namespace FSharp.Data.Runtime

--------------------
namespace System.Runtime
module StructuralInference

from FSharp.Data.Runtime
val inferPrimitiveType : cultureInfo:CultureInfo -> value:string -> Type
Multiple items
type CultureInfo =
  interface ICloneable
  interface IFormatProvider
  new : culture: int -> unit + 3 overloads
  member ClearCachedData : unit -> unit
  member Clone : unit -> obj
  member Equals : value: obj -> bool
  member GetConsoleFallbackUICulture : unit -> CultureInfo
  member GetFormat : formatType: Type -> obj
  member GetHashCode : unit -> int
  member ToString : unit -> string
  ...

--------------------
CultureInfo(culture: int) : CultureInfo
CultureInfo(name: string) : CultureInfo
CultureInfo(culture: int, useUserOverride: bool) : CultureInfo
CultureInfo(name: string, useUserOverride: bool) : CultureInfo
property CultureInfo.InvariantCulture: CultureInfo with get
val typeof<'T> : Type
Multiple items
[<Struct>]
type Guid =
  new : b: byte [] -> unit + 5 overloads
  member CompareTo : value: Guid -> int + 1 overload
  member Equals : g: Guid -> bool + 1 overload
  member GetHashCode : unit -> int
  member ToByteArray : unit -> byte []
  member ToString : unit -> string + 2 overloads
  member TryFormat : destination: Span<char> * charsWritten: byref<int> *?format: ReadOnlySpan<char> -> bool
  member TryWriteBytes : destination: Span<byte> -> bool
  static member NewGuid : unit -> Guid
  static member Parse : input: ReadOnlySpan<char> -> Guid + 1 overload
  ...

--------------------
Guid ()
Guid(b: byte []) : Guid
Guid(b: ReadOnlySpan<byte>) : Guid
Guid(g: string) : Guid
Guid(a: int, b: int16, c: int16, d: byte []) : Guid
Guid(a: int, b: int16, c: int16, d: byte, e: byte, f: byte, g: byte, h: byte, i: byte, j: byte, k: byte) : Guid
Guid(a: uint32, b: uint16, c: uint16, d: byte, e: byte, f: byte, g: byte, h: byte, i: byte, j: byte, k: byte) : Guid
Guid.NewGuid() : Guid
namespace FSharp.Data.Runtime.StructuralTypes
Multiple items
union case Runtime.StructuralTypes.Bit0.Bit0: Runtime.StructuralTypes.Bit0

--------------------
type Bit0 = | Bit0
Multiple items
union case Runtime.StructuralTypes.Bit1.Bit1: Runtime.StructuralTypes.Bit1

--------------------
type Bit1 = | Bit1
Multiple items
[<Struct>]
type DateTime =
  new : year: int * month: int * day: int -> unit + 10 overloads
  member Add : value: TimeSpan -> DateTime
  member AddDays : value: float -> DateTime
  member AddHours : value: float -> DateTime
  member AddMilliseconds : value: float -> DateTime
  member AddMinutes : value: float -> DateTime
  member AddMonths : months: int -> DateTime
  member AddSeconds : value: float -> DateTime
  member AddTicks : value: int64 -> DateTime
  member AddYears : value: int -> DateTime
  ...

--------------------
DateTime ()
   (+0 other overloads)
DateTime(ticks: int64) : DateTime
   (+0 other overloads)
DateTime(ticks: int64, kind: DateTimeKind) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, calendar: Calendar) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, kind: DateTimeKind) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, calendar: Calendar) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int) : DateTime
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int, kind: DateTimeKind) : DateTime
   (+0 other overloads)
val prefix : string
String.StartsWith(value: string) : bool
String.StartsWith(value: char) : bool
String.StartsWith(value: string, comparisonType: StringComparison) : bool
String.StartsWith(value: string, ignoreCase: bool, culture: CultureInfo) : bool
String.Substring(startIndex: int) : string
String.Substring(startIndex: int, length: int) : string
union case JsonValue.Number: decimal -> JsonValue
val d : decimal
Decimal.ToString() : string
Decimal.ToString(format: string) : string
Decimal.ToString(provider: IFormatProvider) : string
Decimal.ToString(format: string, provider: IFormatProvider) : string
Multiple items
[<Struct>]
type Decimal =
  new : value: float -> unit + 8 overloads
  member CompareTo : value: decimal -> int + 1 overload
  member Equals : value: decimal -> bool + 2 overloads
  member GetHashCode : unit -> int
  member GetTypeCode : unit -> TypeCode
  member System.IConvertible.ToBoolean : provider: IFormatProvider -> bool
  member System.IConvertible.ToByte : provider: IFormatProvider -> byte
  member System.IConvertible.ToChar : provider: IFormatProvider -> char
  member System.IConvertible.ToDateTime : provider: IFormatProvider -> DateTime
  member System.IConvertible.ToDecimal : provider: IFormatProvider -> decimal
  ...

--------------------
Decimal ()
Decimal(value: float) : Decimal
Decimal(value: int) : Decimal
Decimal(bits: int []) : Decimal
Decimal(value: int64) : Decimal
Decimal(bits: ReadOnlySpan<int>) : Decimal
Decimal(value: float32) : Decimal
Decimal(value: uint32) : Decimal
Decimal(value: uint64) : Decimal
Decimal(lo: int, mid: int, hi: int, isNegative: bool, scale: byte) : Decimal
Decimal.Parse(s: string) : decimal
Decimal.Parse(s: string, provider: IFormatProvider) : decimal
Decimal.Parse(s: string, style: NumberStyles) : decimal
Decimal.Parse(s: string, style: NumberStyles, provider: IFormatProvider) : decimal
Decimal.Parse(s: ReadOnlySpan<char>,?style: NumberStyles,?provider: IFormatProvider) : decimal
union case JsonValue.Float: float -> JsonValue
val f : float
Double.ToString() : string
Double.ToString(format: string) : string
Double.ToString(provider: IFormatProvider) : string
Double.ToString(format: string, provider: IFormatProvider) : string
[<Struct>]
type Double =
  member CompareTo : value: float -> int + 1 overload
  member Equals : obj: float -> bool + 1 overload
  member GetHashCode : unit -> int
  member GetTypeCode : unit -> TypeCode
  member System.IConvertible.ToBoolean : provider: IFormatProvider -> bool
  member System.IConvertible.ToByte : provider: IFormatProvider -> byte
  member System.IConvertible.ToChar : provider: IFormatProvider -> char
  member System.IConvertible.ToDateTime : provider: IFormatProvider -> DateTime
  member System.IConvertible.ToDecimal : provider: IFormatProvider -> decimal
  member System.IConvertible.ToDouble : provider: IFormatProvider -> float
  ...
Double.Parse(s: string) : float
Double.Parse(s: string, provider: IFormatProvider) : float
Double.Parse(s: string, style: NumberStyles) : float
Double.Parse(s: string, style: NumberStyles, provider: IFormatProvider) : float
Double.Parse(s: ReadOnlySpan<char>,?style: NumberStyles,?provider: IFormatProvider) : float
union case JsonValue.Boolean: bool -> JsonValue
union case JsonValue.Null: JsonValue
union case JsonValue.Record: properties: (string * JsonValue) [] -> JsonValue
val props : (string * JsonValue) []
val key : string
val value : JsonValue
val newValue : JsonValue
union case JsonValue.Array: elements: JsonValue [] -> JsonValue
Multiple items
val array : JsonValue []

--------------------
type 'T array = 'T []
static member JsonValue.Load : reader:IO.TextReader -> JsonValue
static member JsonValue.Load : stream:IO.Stream -> JsonValue
static member JsonValue.Load : uri:string * ?encoding:Text.Encoding -> JsonValue
val printfn : format:Printf.TextWriterFormat<'T> -> 'T
val anonymizedJson : JsonValue