UVAP · Ultinous Video Analytics Platform

/*
Structures for Kafka streams containing Ultinous video analysis results.
Copyright (C) 2014,2018 Ultinous Inc.
*/

////////////////////////////////////////////////////////////////////////////////////////////////////////
//
// This proto describes the Kafka messages created as a result of running video analysis components.
// They are all outputs of one or more components. Some of them are also input to complex components.
//
// Each record below has a time stamp and a key, in addition to the payload described by the proto message.
// The time and key are documented above each record.
//
// Some of the records below are linked by sharing the same key in order to be able to identify one with another.
// For example, AgeRecord has the same key as ObjectDetectionRecord so that each age can be assigned to
// the correseponding person when multiple persons are detected in a single frame.
//
////////////////////////////////////////////////////////////////////////////////////////////////////////

syntax = "proto3";

import "ultinous/proto/common/skeleton.proto";

package ultinous.proto.kafka;

option java_package = "com.ultinous.proto.kafka";
option java_multiple_files = true;

import "ultinous/proto/common/kafka_common.proto";

/** Output of MGR Object Detection and the Detection Filter.

  One instance of this record is generated for each detected head/face on each frame.

  For each analyzed video frame, a series of detection records are created, corresponding to the number of
  people detected in that frame. The detection records belonging to a single video frame are indexed sequentially
  thus these indices are only unique within a single frame. Therefore a combined key is generated from the
  the timestamp of the source video frame and the detection index to make it unique for the entire video stream.

  The end_of_frame field indicates that no more records for the given input video frame will be inserted into the stream.
  When this flag is true, all other fields of the record are invalid.

  time: timestamp of the input video frame
  key: time + "_" + sequential index within frame
*/
message ObjectDetectionRecord
{
  ObjectType type = 1; ///< Type of detected object.
  Rect bounding_box = 2; ///< Rectangular box containing the head/face.
  float detection_confidence = 3; ///< Confidence of the algorithm about a head being detected. Range: [0..1]
  bool end_of_frame = 4; ///< When true, no more results will be returned for this frame; all other fields of the record are invalid.
}

/** 3D rotation coordinates for head pose.
  See e.g. https://howthingsfly.si.edu/flight-dynamics/roll-pitch-and-yaw
  Coordinates can be positive or negative as well.
  (0, 0, 0) degrees means the head directly facing the camera.
  unit: degrees
*/
message Orientation3D {
  float yaw = 1; ///< Yaw.
  float roll = 2; ///< Roll.
  float pitch = 3; ///< Pitch.
}

/** Output of MGR Head Pose Detection.

  One instance of this record is generated for a detection if its head pose can be determined.

  time: timestamp of the input video frame
  key: same as the key of the corresponding ObjectDetectionRecord
*/
message HeadPose3DRecord
{
  Orientation3D pose = 1; ///< Rotation coordinates.
  bool end_of_frame = 2; ///< When true, no more results will be returned for this frame; all other fields of the record are invalid.
}

/** Feature vector
*/
message FeatureVector {
  enum FeatureType
  {
    PERSON_FACE = 0; ///< Face feature calculation result type
    PERSON_FULL_BODY = 1; ///< Full-body feature calculation result type
  }
  string model_id = 1; ///< Unique id of the model (neural network) that generated the vector.
  repeated float feature = 2; ///< Model specific internal feature representation.
  FeatureType type = 3; ///< Type of features represented by the feature vector.
}

/** Output of MGR Feature Vector Detection.
  Feature vectors are an internal representation of the characteristics of a specific person's face or full body.
  This record type is only to be used by other Ultinous software components, e.g. for face recognition.

  One instance of this record is generated for a detection if its feature vector can be determined.

  time: timestamp of the input video frame
  key: same as the key of the corresponding ObjectDetectionRecord
*/
message FeatureVectorRecord
{
  FeatureVector features = 1; ///< Internal representation of the detected face or full body.
  bool end_of_frame = 2; ///< When true, no more results will be returned for this frame; all other fields of the record are invalid.
}

/** Output of MGR Gender Detection.

  One instance of this record is generated for a detection if its gender can be determined.

  time: timestamp of the input video frame
  key: same as the key of the corresponding ObjectDetectionRecord
*/
message GenderRecord
{
  enum Gender {
    MALE = 0;
    FEMALE = 1;
  }

  Gender gender = 1; ///< Gender of detected person.
  float confidence = 2; ///< Confidence of the algorithm about the gender decision. Range: [0..1]
  bool end_of_frame = 3; ///< When true, no more results will be returned for this frame; all other fields of the record are invalid.
}

/** Output of MGR Face Mask Detection.

  One instance of this record is generated for a detection if it is suitable for face mask detection.

  time: timestamp of the input video frame
  key: same as the key of the corresponding ObjectDetectionRecord
*/
message FaceMaskRecord
{
  bool has_mask = 1; ///< True if the detected person wears a face mask.
  float confidence = 2; ///< Confidence of the algorithm about the decision. Range: [0..1]
  bool end_of_frame = 3; ///< When true, no more results will be returned for this frame; all other fields of the record are invalid.
}

/** Output of MGR Age Detection.

  One instance of this record is generated for a detection if its age can be determined.

  time: timestamp of the input video frame
  key: same as the key of the corresponding ObjectDetectionRecord
*/
message AgeRecord
{
  uint32 age = 1; ///< Age of detected person. Unit: years
  float confidence = 2; ///< Confidence of the algorithm about the age decision. Range: [0..1]
  bool end_of_frame = 3; ///< When true, no more results will be returned for this frame; all other fields of the record are invalid.
}

/** Output of MGR Skeleton Detection.

  One instance of this record is generated for each detected person on each frame.
  A skeleton is a set of points, labelled with a body part label.
  Points are not connected, a possible way of connecting them is described in skeleton.proto.

  time: timestamp of the input video frame
  key: time + "_" + sequential index within frame
*/
message SkeletonRecord
{
  /** A single point in the skeleton.
  */
  message SkeletonPoint
  {
    float x = 1; ///< Horizontal coordinate in pixels, sub-pixel precision
    float y = 2; ///< Vertical coordinate in pixels, sub-pixel precision
    common.SkeletonPointType type = 3; ///< Type corresponding to a specific body part, see skeleton.proto
    float confidence = 4; ///< Point detection confidence. Range: [0..1]
  }

  repeated SkeletonPoint points = 1; ///< Each point has a different type, i.e. each body part occurs either once or not at all.
  bool end_of_frame = 2; ///< When true, no more results will be returned for this frame; all other fields of the record are invalid.
}

/** Output of MGR containing frame information.

  One instance of this record is generated for each input video frame.

  time: timestamp of the input video frame
  key: empty
*/
message FrameInfoRecord
{
  uint32 columns = 1; ///< Number of pixels in the horizontal dimension.
  uint32 rows = 2; ///< Number of pixels in the vertical dimension.
}

/** Output of kafka_tracker.

  One instance of this record is generated for each new point on each track.
  For each track, a new point is created for each new frame, until the track ends.
  New points of the track are  predicted by the tracking algorithm.
  The predicted points may or may not be confirmed by an actual detection.
  Even when confirmed, predicted points produce smoother tracks than bounding box centers.

  time: time of the frame
  key: trackStartTime_trackId
    trackId is a serial number starting from 0 when the application starts.
    Note that the key is the same for all TrackChangeRecords of a single track.
*/
message TrackChangeRecord
{
  bool end_of_track = 1; ///< When true, no more results will be returned for this track; all other fields of the record are invalid.
  string detection_key = 2; ///< Same as the key of the corresponding ObjectDetectionRecord when the prediction is confirmed by a detection. Empty otherwise.

  Point point = 3; ///< Track point predicted for the current input frame.
}

/** Identifier structure for a PassEvent
*/
message PassEventId
{
  string track_key = 1; ///< Key of the TrackChangeRecord message corresponding to the track crossing the line in this event.
  uint32 serial = 2; ///< Serial number of the current passage within the same track (starts from 0).
}

/**  Details of a passage event.
  Used by PassDetectionRecord and PassCounterRecord.
*/
message PassEvent
{
  /** Direction of line crossing.
    The pass line itself is considered to have a direction, from its first point to the last one.
    The terms 'left' and 'right' are defined by an observer moving along the direction of the pass line.
    Analogy: left vs. right bank of a river.
  */
  enum CrossDirection
  {
    LR = 0; ///< Left to right crossing
    RL = 1; ///< Right to left crossing
  }

  PassEventId id = 1; ///< Identifier of the current PassEvent.
  string pass_line_id = 2; ///< The 'id' of the PassLine in PassDetConfigRecord being crossed.
  CrossDirection cross_dir = 3; ///< Crossing direction.
  uint32 section_idx = 4; ///< The poly-line segment index that has been crossed, starting from 0.
  Point cross_point = 5; ///< Point of intersection of the track and the pass line.
}

/** Output of kafka_passdet.

  One instance of this record is generated when any of the following events happens:
  E1. Passage event (type = PASS_CANDIDATE, pass_candidate given)
    A track crosses a pass line. That is, the previous and current TrackChangeRecord points are
    on opposite sides of the pass line. The last point of the track can be either detected or
    predicted, this is represented by the pass_candidate.is_extrapolated being true. For non-predicted
    events there will be no passage realization (type = PASS_REALIZED). For predicted events there
    is a possibility for a realization event until the end of the track (type = END_OF_TRACK).
      Key is pass_line_id.
      Time is the time of the frame containing the last point of the track, i.e. the first frame after the passage.
  E2. Passage Realization event (type = PASS_REALIZED, pass_realized given):
    A prediction based PassCandidate (is_extrapolated = true) can be confirmed by a true detection.
    Such a PassCandidate can become realized until the track ends. A single track change input may
    trigger several Passage Realization Events when the track contained several passages based
    on prediction.
      Key is empty.
      Time is the time of the frame containing the last point of the track, i.e. the frame containing the true detection.
  E3. End of Track event (type = END_OF_TRACK, end_of_track given):
    A track has ended. It is sent out even if the track didn't generate passage event.
      Key is empty.
      Time is the time of the TrackChangeRecord with end_of_track=true.
  E4. Heartbeat event (type = HEARTBEAT, no details):
    There is input but no message was sent in the last second.
      Key is empty.
      Time is the time of the last input.
*/
message PassDetectionRecord
{
  enum Type
  {
    HEARTBEAT = 0;
    PASS_CANDIDATE = 1;
    PASS_REALIZED = 2;
    END_OF_TRACK= 3;
  }
  message PassCandidate
  {
    PassEvent pass = 1; ///< Details of the passing event
    bool is_extrapolated = 2; ///< True if pass detected with predicted track, false if detected with real track
  }
  message PassRealized
  {
    PassEventId pass_event_ref = 1; ///< Identifier of an earlier PassDetectionRecord with type=PASS_CANDIDATE that is realized.
  }
  message EndOfTrack
  {
    string track_key = 1; ///< TrackChangeRecord key
  }

  Type type = 1; ///< Event type

  oneof details ///< Not set when type = HEARTBEAT
  {
    PassCandidate pass_candidate = 2; ///< Only for type=PASS_CANDIDATE
    PassRealized pass_realized = 3; ///< Only for type=PASS_REALIZED
    EndOfTrack end_of_track = 4; ///< Only for type=END_OF_TRACK
  }
}

/** Key to feature vector clusters.*/
message FVClusterId
{
  int64 first_detection_time = 1; ///< Time of the feature vector input record at first observation belonging to this cluster.
  string first_detection_key = 2; ///< Key of the feature vector input record at first observation belonging to this cluster.
  string first_detection_stream_id = 3; ///< Stream id of first observation belonging to this cluster.
}

/** Feature vector cluster representation */
message FVCluster
{
  FeatureVector representative_fv = 1; ///< Feature vector representative to the cluster. May be different from any input feature vector.
  uint32 num_observations = 2; ///< Number of input feature vectors that are aggregated into representative_fv. Must be positive.
  /** Shows if cluster has been realized.
  If cluster_realization in the configuration is null, this value will always be true.
  Clusters are not meant to be used until is_realized is true.
  While false, outputs are only saved to keep track of input keys and their clusters.
  */
  bool is_realized = 3;
}

/** Event of adding a new feature vector to a cluster system.
  Happens for each feature vector input of kafka_feature_vector_clustering.
  Happens for each feature vector input from a registration stream of kafka_reid.
    The feature vector input is either registered as a new cluster or updates the feature a stored cluster.
*/
message FVRegistrationEvent
{
  FVClusterId cluster_id = 1; ///< Stored id after registration. Can be new or updated.
  FVCluster cluster = 2; ///< Stored cluster after registration.
  string input_stream_id = 3; ///< Stream id of the current input record.
}

/** Event of merging some clusters into one. */
message FVClusterMergingEvent
{
  FVClusterId cluster_id = 1; ///< Stored id after merging.
  FVCluster cluster = 2; ///< Stored cluster after merging.
  repeated FVClusterId merged_clusters = 3; ///< List of merged clusters.
}

/** Event of deleting a cluster when its retention period expires.
  Provides the reason of deleting:
  1. EXPIRED: Deleted because the expiration period has reached.
  2. REMOVED: Deleted by deletion message.
  3. REALIZED: Deleted because the cluster has been successfully realized.
*/
message FVClusterDeletedEvent
{
  enum DeletionReason
  {
    EXPIRED = 0;
    REMOVED = 1;
    REALIZED = 2;
  }

  FVClusterId deleted_cluster = 1; ///< Id of deleted cluster.
  DeletionReason reason = 2; ///< Deletion reason.
}

/** Output of kafka_feature_vector_clustering.

  One instance of this record is generated when any of the following happens:
  1. reg_event: The feature vector input is either registered as a new cluster or updates a stored cluster.
  2. merge_event: Some of the clusters are merged into one cluster.
  3. delete_event: A cluster is deleted because its retention period expired.
  4. end_of_input_record: There will be no more events for this input. This is denoted by an empty message.

  time: Time of the current input record.
  key: Key of the current input record.
*/
message FVClusterUpdateRecord
{
  enum Type
  {
    END_OF_INPUT_RECORD = 0;
    REG_EVENT = 1;
    MERGE_EVENT = 2;
    DELETE_EVENT = 3;
  }

  Type type = 1;
  oneof event
  {
    FVRegistrationEvent reg_event = 2; ///< Given when input is either registered as a new cluster or updates a stored cluster.
    FVClusterMergingEvent merge_event = 3; ///< Given when some of the clusters are merged into one cluster.
    FVClusterDeletedEvent delete_event = 4; ///< Given when a cluster is deleted because its retention period expired.
  }
}

/** Output of kafka_reid.

  kafka_reid has an internal database of feature vector clusters. Each cluster corresponds to a separate identity (person).

  One instance of this record is generated when any of the following happens:
  1. reid_event: The input from a reidentification stream is matched by a list of stored clusters.
  2. reg_event: The input from a registration stream is either registered as a new cluster or updates a stored cluster.
  3. merge_event: Some of the clusters are merged into one cluster.
  4. delete_event: A cluster is deleted because its retention period expired.
  5. end_of_input_record: There will be no more events for this input. This is denoted by an empty message.

  time: Time of the current input record.
  key: Key of the current input record.
*/
message ReidRecord
{
  enum Type
  {
    END_OF_INPUT_RECORD = 0;
    REID_EVENT = 1;
    REG_EVENT = 2;
    MERGE_EVENT = 3;
    DELETE_EVENT = 4;
  }

  message ScoredFVClusterId
  {
    FVClusterId id = 1; ///< See FVClusterId
    float score = 2; ///< Score of matching. Range: [0..1]
  }

  message ReidEvent
  {
    repeated ScoredFVClusterId match_list = 1; ///< List of matching identities, in decreasing order of scores.
    string input_stream_id = 2; ///< Stream id of the current input record.
  }

  Type type = 1;
  oneof event
  {
    ReidEvent reid_event = 2; ///< Given when input from a reidentification stream is matched by a list of stored clusters.
    FVRegistrationEvent reg_event = 3; ///< Given when input from a registration stream is either registered as a new cluster or updates a stored cluster.
    FVClusterMergingEvent merge_event = 4; ///< Given when some of the clusters are merged into one cluster.
    FVClusterDeletedEvent delete_event = 5; ///< Given when a cluster is deleted because its retention period expired.
  }
}