Architecture Overview

Typical Data Flow of a Spooq Data Pipeline

@startuml

skinparam monochrome true
skinparam defaultFontname Bitstream Vera Sans Mono
skinparam defaultFontSize 18

hide empty members

' title Spooq2 - Data Flow Diagram

allow_mixing

actor Client

package "Pipeline"  {
    class "Pipeline Instance" {
        execute()
    }
}

Client -> "Pipeline Instance"
note on link
execute()
end note

database "Source System"
database "Target System"

package "Extractor"  {

    "Pipeline Instance" --> "Extractor Instance"

    class "Extractor Instance" {
        extract()
    }

}

"Extractor Instance" <- "Source System"
note on link
    Raw Data
end note


package "Transformers"  {

    "Extractor Instance" --> "Transformer  Instance 1"

    note on link
        DataFrame
    end note

    class "Transformer  Instance 1" {
        transform(input_df: DataFrame)
    }

    "Transformer  Instance 1" --> "Transformer  Instance 2"

    note on link
        DataFrame
    end note

    class "Transformer  Instance 2" {
        transform(input_df: DataFrame)
    }

    "Transformer  Instance 2" --> "Transformer  Instance N"

    note on link
        DataFrame
    end note

    class "Transformer  Instance N" {
        transform(input_df: DataFrame)
    }

}

package "Loader" {

    "Transformer  Instance N" -> "Loader Instance"

    note on link
        DataFrame
    end note

    class "Loader Instance" {
        load(input_df: DataFrame)
    }
}

"Target System" <----- "Loader Instance"
note on link
    Transformed Data
end note

@enduml

Simplified Class Diagram

@startuml

skinparam monochrome true
skinparam defaultFontname Bitstream Vera Sans Mono
skinparam defaultFontSize 18

left to right direction
hide empty members

' namespace spooq2 {
  namespace extractor {
      
      class Extractor {
      }
      Extractor <|-- JSONExtractor
      class JSONExtractor{
      }
      Extractor <|-- JDBCExtractor
      class JDBCExtractor{
      }
      JDBCExtractor <|- JDBCExtractorFullLoad
      class JDBCExtractorFullLoad {
      }
      JDBCExtractorIncremental -|> JDBCExtractor
      class JDBCExtractorIncremental {
      }

  }
  namespace transformer{
    
    class Transformer
    Transformer <|-- Exploder
    class Exploder
    Transformer <|-- Mapper
    class Mapper
    Transformer <|-- NewestByGroup
    class NewestByGroup
    Transformer <|-- Sieve
    class Sieve
    Transformer <|-- ThresholdCleaner
    class ThresholdCleaner

  }
  namespace loader {
    
    class Loader
    Loader <|-- HiveLoader
    class HiveLoader
  }
  namespace pipeline {
    
    class Pipeline 
    Pipeline o-- extractor : has 1 instance >
    Pipeline o-- transformer : has 1+ instances >
    Pipeline o-- loader : has 1 instance >

    class PipelineFactory
    PipelineFactory .> Pipeline : "creates"
  }
' }

' skinparam ClassArrowFontSize 18
skinparam ClassRelationFontSize 30

@enduml