	{"id":61071,"date":"2021-05-27T16:04:00","date_gmt":"2021-05-27T15:04:00","guid":{"rendered":"https:\/\/www.artefact.com\/?post_type=news&#038;p=61071"},"modified":"2024-09-20T17:45:44","modified_gmt":"2024-09-20T16:45:44","slug":"the-path-to-developing-a-high-performance-demand-forecasting-model-part-2","status":"publish","type":"blog","link":"https:\/\/www.artefact.com\/de\/blog\/the-path-to-developing-a-high-performance-demand-forecasting-model-part-2\/","title":{"rendered":"Der Weg zur Entwicklung eines leistungsstarken Nachfrageprognosemodells - Teil 2"},"content":{"rendered":"<p><div class=\"fusion-fullwidth fullwidth-box fusion-builder-row-1 fusion-flex-container nonhundred-percent-fullwidth non-hundred-percent-height-scrolling article-author\" style=\"--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-background-color:#ffffff;--awb-flex-wrap:wrap;\" ><div class=\"fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap\" style=\"max-width:calc( 1440px + 20px );margin-left: calc(-20px \/ 2 );margin-right: calc(-20px \/ 2 );\"><div class=\"fusion-layout-column fusion_builder_column fusion-builder-column-0 fusion_builder_column_1_2 1_2 fusion-flex-column\" style=\"--awb-bg-size:cover;--awb-width-large:50%;--awb-margin-top-large:0px;--awb-spacing-right-large:10px;--awb-margin-bottom-large:0px;--awb-spacing-left-large:10px;--awb-width-medium:50%;--awb-order-medium:0;--awb-spacing-right-medium:10px;--awb-spacing-left-medium:10px;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:10px;--awb-spacing-left-small:10px;\"><div class=\"fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column\"><div class=\"fusion-title title fusion-title-1 fusion-sep-none fusion-title-text fusion-title-size-two\" style=\"--awb-margin-bottom-small:8px;\"><h2 class=\"fusion-title-heading title-heading-left fusion-responsive-typography-calculated\" style=\"margin:0;--fontSize:50;line-height:1.2;\">Author<\/h2><\/div><img decoding=\"async\" src=\"data:image\/svg+xml,%3Csvg%20xmlns%3D%27http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%27%20width%3D%27150%27%20height%3D%270%27%20viewBox%3D%270%200%20150%200%27%3E%3Crect%20width%3D%27150%27%20height%3D%270%27%20fill-opacity%3D%220%22%2F%3E%3C%2Fsvg%3E\" data-orig-src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Rafaelle-Aygalenq-300x300.jpeg\" alt=\"Image\" class=\"lazyload artefact-elegant-image align-left article-author-image\" style=\"width: 150px; border-radius: 54% 46% 77% 23% \/ 74% 40% 60% 26%; overflow: hidden;\" width=\"150\" height=\"auto\" \/><div class=\"fusion-title title fusion-title-2 fusion-sep-none fusion-title-text fusion-title-size-three article-author-name-title\" style=\"--awb-margin-bottom-small:8px;\"><h3 class=\"fusion-title-heading title-heading-left fusion-responsive-typography-calculated\" style=\"margin:0;--fontSize:20;line-height:1.2;\">Rafaelle Aygalenq<\/h3><\/div><div class=\"fusion-text fusion-text-1 article-author-description\"><p>Senior Data Scientist<\/p>\n<\/div><\/div><\/div><\/div><\/div><div class=\"fusion-fullwidth fullwidth-box fusion-builder-row-2 fusion-flex-container nonhundred-percent-fullwidth non-hundred-percent-height-scrolling\" style=\"--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-margin-top:40px;--awb-margin-bottom:40px;--awb-flex-wrap:wrap;\" ><div class=\"fusion-builder-row fusion-row fusion-flex-align-items-center fusion-flex-justify-content-center fusion-flex-content-wrap\" style=\"max-width:calc( 1440px + 20px );margin-left: calc(-20px \/ 2 );margin-right: calc(-20px \/ 2 );\"><div class=\"fusion-layout-column fusion_builder_column fusion-builder-column-1 fusion_builder_column_1_1 1_1 fusion-flex-column fusion-flex-align-self-center\" style=\"--awb-padding-top:40px;--awb-padding-right:40px;--awb-padding-bottom:40px;--awb-padding-left:40px;--awb-overflow:hidden;--awb-bg-position:left center;--awb-bg-size:cover;--awb-border-color:rgba(10,17,40,0.1);--awb-border-style:solid;--awb-border-radius:4px 4px 4px 4px;--awb-width-large:100%;--awb-margin-top-large:0px;--awb-spacing-right-large:10px;--awb-margin-bottom-large:0px;--awb-spacing-left-large:10px;--awb-width-medium:100%;--awb-order-medium:0;--awb-spacing-right-medium:10px;--awb-spacing-left-medium:10px;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:10px;--awb-spacing-left-small:10px;\"><div class=\"fusion-column-wrapper lazyload fusion-column-has-shadow fusion-flex-justify-content-center fusion-content-layout-column fusion-column-has-bg-image\" data-bg-url=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/03\/background.jpg\" data-bg=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/03\/background.jpg\"><div class=\"fusion-image-element\" style=\"text-align:center;--awb-margin-right:20px;--awb-margin-left:20px;--awb-max-width:150px;--awb-caption-title-font-family:var(--h2_typography-font-family);--awb-caption-title-font-weight:var(--h2_typography-font-weight);--awb-caption-title-font-style:var(--h2_typography-font-style);--awb-caption-title-size:var(--h2_typography-font-size);--awb-caption-title-transform:var(--h2_typography-text-transform);--awb-caption-title-line-height:var(--h2_typography-line-height);--awb-caption-title-letter-spacing:var(--h2_typography-letter-spacing);\"><span class=\" fusion-imageframe imageframe-none imageframe-1 hover-type-none\"><img decoding=\"async\" width=\"72\" height=\"41\" title=\"medium\" src=\"data:image\/svg+xml,%3Csvg%20xmlns%3D%27http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%27%20width%3D%2772%27%20height%3D%2741%27%20viewBox%3D%270%200%2072%2041%27%3E%3Crect%20width%3D%2772%27%20height%3D%2741%27%20fill-opacity%3D%220%22%2F%3E%3C%2Fsvg%3E\" data-orig-src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/03\/medium.png\" alt class=\"lazyload img-responsive wp-image-60927\"\/><\/span><\/div><div class=\"fusion-title title fusion-title-3 fusion-sep-none fusion-title-center fusion-title-text fusion-title-size-three\" style=\"--awb-margin-top:20px;--awb-margin-bottom:0px;--awb-margin-bottom-small:8px;\"><h3 class=\"fusion-title-heading title-heading-center fusion-responsive-typography-calculated\" style=\"margin:0;--fontSize:20;line-height:1.2;\">Medium Blog by Artefact.<\/h3><\/div><div class=\"fusion-text fusion-text-2\" style=\"--awb-content-alignment:center;\"><p>This article was initially published on <strong>Medium.com<\/strong>.<br \/>\nFollow us on our Medium Blog !<\/p>\n<\/div><div style=\"text-align:center;\"><a class=\"fusion-button button-flat button-medium button-default fusion-button-default button-1 fusion-button-default-span fusion-button-default-type\" target=\"_blank\" rel=\"noopener noreferrer\" href=\"https:\/\/medium.com\/\"><span class=\"fusion-button-text awb-button__text awb-button__text--default\">Read Our Article<\/span><\/a><\/div><\/div><\/div><\/div><\/div><div class=\"fusion-fullwidth fullwidth-box fusion-builder-row-3 fusion-flex-container nonhundred-percent-fullwidth non-hundred-percent-height-scrolling\" style=\"--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;\" ><div class=\"fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap\" style=\"max-width:calc( 1440px + 20px );margin-left: calc(-20px \/ 2 );margin-right: calc(-20px \/ 2 );\"><div class=\"fusion-layout-column fusion_builder_column fusion-builder-column-2 fusion_builder_column_1_1 1_1 fusion-flex-column\" style=\"--awb-bg-size:cover;--awb-width-large:100%;--awb-margin-top-large:0px;--awb-spacing-right-large:10px;--awb-margin-bottom-large:0px;--awb-spacing-left-large:10px;--awb-width-medium:100%;--awb-order-medium:0;--awb-spacing-right-medium:10px;--awb-spacing-left-medium:10px;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:10px;--awb-spacing-left-small:10px;\"><div class=\"fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column\"><div class=\"fusion-text fusion-text-3 description\"><p><strong>5 tips to better take promotional data into account<\/strong><br \/>\n<strong>TL;DR<\/strong><br \/>\nIn this following article of a large series of posts dedicated to demand forecasting, we will focus on how to model promotions, a key driver in sales forecasting, looking at what a typical promo dataset looks like, how features should be crafted, with a step-by-step example in Python, and how complex promotional granularity can be dealt with.<\/p>\n<\/div><\/div><\/div><\/div><\/div><article class=\"fusion-fullwidth fullwidth-box fusion-builder-row-4 fusion-flex-container nonhundred-percent-fullwidth non-hundred-percent-height-scrolling\" style=\"--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;\" ><div class=\"fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-justify-content-center fusion-flex-content-wrap\" style=\"max-width:calc( 1440px + 20px );margin-left: calc(-20px \/ 2 );margin-right: calc(-20px \/ 2 );\"><div class=\"fusion-layout-column fusion_builder_column fusion-builder-column-3 fusion_builder_column_1_1 1_1 fusion-flex-column\" style=\"--awb-bg-size:cover;--awb-width-large:100%;--awb-margin-top-large:0px;--awb-spacing-right-large:10px;--awb-margin-bottom-large:0px;--awb-spacing-left-large:10px;--awb-width-medium:100%;--awb-order-medium:0;--awb-spacing-right-medium:10px;--awb-spacing-left-medium:10px;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:10px;--awb-spacing-left-small:10px;\"><div class=\"fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column\"><div class=\"fusion-title title fusion-title-4 fusion-sep-none fusion-title-text fusion-title-size-two\" style=\"--awb-margin-bottom-small:8px;\"><h2 class=\"fusion-title-heading title-heading-left fusion-responsive-typography-calculated\" style=\"margin:0;--fontSize:50;line-height:1.2;\">Context<\/h2><\/div><div class=\"fusion-text fusion-text-4\"><p>When forecasting demand as retailers, we often can leverage several useful data sources such as historical sell-in, product and customer hierarchies, holidays, sell-out and promotions. It is important to pay particular attention to the latter as, in real life, promotional activity is often much more than just a dummy flag you should add as a feature to your model. It is a real complex business mechanism which can yield additional performance to your model if processed well.<\/p>\n<\/div><div class=\"fusion-title title fusion-title-5 fusion-sep-none fusion-title-text fusion-title-size-two\" style=\"--awb-margin-bottom-small:8px;\"><h2 class=\"fusion-title-heading title-heading-left fusion-responsive-typography-calculated\" style=\"margin:0;--fontSize:50;line-height:1.2;\">Tips 1: Understanding promotions referentials<\/h2><\/div><div class=\"fusion-text fusion-text-5\"><p>Sales dataset often do not include promo data. You need to plug in a specific referential into your training data. Promotional data often come as a set of columns, in the format of a promotion plan with promotion characteristics such as:<\/p>\n<\/div><ul style=\"--awb-line-height:27.2px;--awb-icon-width:27.2px;--awb-icon-height:27.2px;--awb-icon-margin:11.2px;--awb-content-margin:38.4px;\" class=\"fusion-checklist fusion-checklist-1 fusion-checklist-default type-icons\"><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-no\"><i class=\"fusion-li-icon awb-icon-check\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p>SKU concerned by the promotion<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-no\"><i class=\"fusion-li-icon awb-icon-check\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p>Retailer benefiting from the promotion<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-no\"><i class=\"fusion-li-icon awb-icon-check\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p>Start and end dates<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-no\"><i class=\"fusion-li-icon awb-icon-check\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p>Promotion ID (<em>unique ID for identifying a promotion for a given SKU, retailer, start date, end date<\/em>)<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-no\"><i class=\"fusion-li-icon awb-icon-check\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p>Promotion type (<em>e.g. 3 for 2<\/em>)<\/p>\n<\/div><\/li><\/ul><div class=\"fusion-text fusion-text-6\"><p><img decoding=\"async\" class=\"lazyload aligncenter wp-image-61084 size-full\" src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-promotion-data.png\" data-orig-src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-promotion-data.png\" alt=\"\" width=\"625\" height=\"216\" srcset=\"data:image\/svg+xml,%3Csvg%20xmlns%3D%27http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%27%20width%3D%27625%27%20height%3D%27216%27%20viewBox%3D%270%200%20625%20216%27%3E%3Crect%20width%3D%27625%27%20height%3D%27216%27%20fill-opacity%3D%220%22%2F%3E%3C%2Fsvg%3E\" data-srcset=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-promotion-data-200x69.png 200w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-promotion-data-300x104.png 300w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-promotion-data-400x138.png 400w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-promotion-data-600x207.png 600w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-promotion-data.png 625w\" data-sizes=\"auto\" data-orig-sizes=\"(max-width: 625px) 100vw, 625px\" \/><\/p>\n<p style=\"text-align: center;\">Example of promotion data<\/p>\n<\/div><div class=\"fusion-text fusion-text-7\"><p>Before starting the feature engineering and modeling parts, it is recommended to perform interviews with the business owners in order to understand how promotion are created and handled. Let\u2019s take the example of dates. In the case of a sell-in prediction, a promotion will be associated with several dates:<\/p>\n<\/div><ul style=\"--awb-line-height:27.2px;--awb-icon-width:27.2px;--awb-icon-height:27.2px;--awb-icon-margin:11.2px;--awb-content-margin:38.4px;\" class=\"fusion-checklist fusion-checklist-2 fusion-checklist-default type-icons\"><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-no\"><i class=\"fusion-li-icon awb-icon-check\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p>official start and end dates for shipping orders from the distributor to the retailers (1)<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-no\"><i class=\"fusion-li-icon awb-icon-check\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p>and official start and end dates of promotions set up in stores by retailers (2)<\/p>\n<\/div><\/li><\/ul><div class=\"fusion-text fusion-text-8\"><p>It is really important to understand which dates impact the most the target variable and to check with the business owners if there are any specificities to take into account (e.g. if some retailers may apply the promotions before or after the official start and end dates).<\/p>\n<\/div><div class=\"fusion-text fusion-text-9\"><p>Performing Exploratory Data Analysis (EDA) can help you understand the fluctuations and the impact promotions can have on the target variable. For example, below one can see that a particular type of promotion has a greater impact than others.<\/p>\n<\/div><div class=\"fusion-text fusion-text-10\"><p><img decoding=\"async\" class=\"lazyload aligncenter wp-image-61083 size-full\" src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-the-impact-of-two-different-promotions-on-the-sales-of-a-given-product.png\" data-orig-src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-the-impact-of-two-different-promotions-on-the-sales-of-a-given-product.png\" alt=\"\" width=\"488\" height=\"235\" srcset=\"data:image\/svg+xml,%3Csvg%20xmlns%3D%27http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%27%20width%3D%27488%27%20height%3D%27235%27%20viewBox%3D%270%200%20488%20235%27%3E%3Crect%20width%3D%27488%27%20height%3D%27235%27%20fill-opacity%3D%220%22%2F%3E%3C%2Fsvg%3E\" data-srcset=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-the-impact-of-two-different-promotions-on-the-sales-of-a-given-product-200x96.png 200w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-the-impact-of-two-different-promotions-on-the-sales-of-a-given-product-300x144.png 300w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-the-impact-of-two-different-promotions-on-the-sales-of-a-given-product-400x193.png 400w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-the-impact-of-two-different-promotions-on-the-sales-of-a-given-product.png 488w\" data-sizes=\"auto\" data-orig-sizes=\"(max-width: 488px) 100vw, 488px\" \/><\/p>\n<p style=\"text-align: center;\">Example of the impact of two different promotions on the sales of a given product<\/p>\n<\/div><div class=\"fusion-text fusion-text-11\"><p>EDA can also be performed to validate insights previously highlighted by business teams. Below, it seems retailers are starting promotions well before the official start date.<\/p>\n<\/div><div class=\"fusion-text fusion-text-12\"><p><img decoding=\"async\" class=\"lazyload aligncenter wp-image-61082 size-full\" src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-difference-between-official-and-real-start-date-of-promotions.png\" data-orig-src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-difference-between-official-and-real-start-date-of-promotions.png\" alt=\"\" width=\"467\" height=\"253\" srcset=\"data:image\/svg+xml,%3Csvg%20xmlns%3D%27http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%27%20width%3D%27467%27%20height%3D%27253%27%20viewBox%3D%270%200%20467%20253%27%3E%3Crect%20width%3D%27467%27%20height%3D%27253%27%20fill-opacity%3D%220%22%2F%3E%3C%2Fsvg%3E\" data-srcset=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-difference-between-official-and-real-start-date-of-promotions-200x108.png 200w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-difference-between-official-and-real-start-date-of-promotions-300x163.png 300w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-difference-between-official-and-real-start-date-of-promotions-400x217.png 400w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-difference-between-official-and-real-start-date-of-promotions.png 467w\" data-sizes=\"auto\" data-orig-sizes=\"(max-width: 467px) 100vw, 467px\" \/><\/p>\n<p style=\"text-align: center;\">Example of difference between official and real start date of promotions<\/p>\n<\/div><div class=\"fusion-text fusion-text-13\"><p>After the exploration, we need to process the raw promotion data in order for it to be used. The dates have first to be expanded and processed in order to have a continuous timeline. Some of them may need to be shifted based on what was found during the exploration and business interview phases.<\/p>\n<\/div><div class=\"fusion-text fusion-text-14\"><p style=\"text-align: center;\"><img decoding=\"async\" class=\"lazyload alignnone wp-image-61081 size-full\" src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Illustration-of-promotion-data-before-and-after-preprocessing.png\" data-orig-src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Illustration-of-promotion-data-before-and-after-preprocessing.png\" alt=\"\" width=\"700\" height=\"177\" srcset=\"data:image\/svg+xml,%3Csvg%20xmlns%3D%27http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%27%20width%3D%27700%27%20height%3D%27177%27%20viewBox%3D%270%200%20700%20177%27%3E%3Crect%20width%3D%27700%27%20height%3D%27177%27%20fill-opacity%3D%220%22%2F%3E%3C%2Fsvg%3E\" data-srcset=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Illustration-of-promotion-data-before-and-after-preprocessing-200x51.png 200w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Illustration-of-promotion-data-before-and-after-preprocessing-300x76.png 300w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Illustration-of-promotion-data-before-and-after-preprocessing-400x101.png 400w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Illustration-of-promotion-data-before-and-after-preprocessing-600x152.png 600w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Illustration-of-promotion-data-before-and-after-preprocessing.png 700w\" data-sizes=\"auto\" data-orig-sizes=\"(max-width: 700px) 100vw, 700px\" \/><\/p>\n<p style=\"text-align: center;\">Illustration of promotion data before and after preprocessing<\/p>\n<\/div><div class=\"fusion-title title fusion-title-6 fusion-sep-none fusion-title-text fusion-title-size-two\" style=\"--awb-margin-bottom-small:8px;\"><h2 class=\"fusion-title-heading title-heading-left fusion-responsive-typography-calculated\" style=\"margin:0;--fontSize:50;line-height:1.2;\">Tips 2: Choosing the right granularity<\/h2><\/div><div class=\"fusion-text fusion-text-15\"><p>As your EDA will show, promotions impacts differ greatly across products, retailers (or stores if you\u2019re working with sell-out data), types of promotions. Ideally, you\u2019d like to be as much precise as possible and keep a SKU x retailer x promotion type granularity.<\/p>\n<\/div><div class=\"fusion-text fusion-text-16\"><p>For example, you may have two different geographic granularity: retailer and warehouse (i.e. a warehouse contains severals retailers). By plotting your time-series at each granularity, you may find out that the impact of promotion is really visible at retailer level but seems smoothed for warehouses. This can be explained by the fact that not all retailers of a the warehouse are impacted by the promotion in the same way. Therefore in that example, it is preferable to work at the retailer granularity.<\/p>\n<\/div><div class=\"fusion-text fusion-text-17\"><p><img decoding=\"async\" class=\"lazyload aligncenter wp-image-61080 size-full\" src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Difference-of-the-promotion-impact-depending-on-the-granularity.png\" data-orig-src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Difference-of-the-promotion-impact-depending-on-the-granularity.png\" alt=\"\" width=\"700\" height=\"273\" srcset=\"data:image\/svg+xml,%3Csvg%20xmlns%3D%27http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%27%20width%3D%27700%27%20height%3D%27273%27%20viewBox%3D%270%200%20700%20273%27%3E%3Crect%20width%3D%27700%27%20height%3D%27273%27%20fill-opacity%3D%220%22%2F%3E%3C%2Fsvg%3E\" data-srcset=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Difference-of-the-promotion-impact-depending-on-the-granularity-200x78.png 200w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Difference-of-the-promotion-impact-depending-on-the-granularity-300x117.png 300w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Difference-of-the-promotion-impact-depending-on-the-granularity-400x156.png 400w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Difference-of-the-promotion-impact-depending-on-the-granularity-600x234.png 600w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Difference-of-the-promotion-impact-depending-on-the-granularity.png 700w\" data-sizes=\"auto\" data-orig-sizes=\"(max-width: 700px) 100vw, 700px\" \/><\/p>\n<p style=\"text-align: center;\">Difference of the promotion impact depending on the granularity<\/p>\n<\/div><div class=\"fusion-text fusion-text-18\"><p>Once the EDA has been done and the promotion data is at the right granularity, the objective is to create the most relevant features for future planned promotions for which we want to predict the associated sales.<\/p>\n<\/div><div class=\"fusion-title title fusion-title-7 fusion-sep-none fusion-title-text fusion-title-size-two\" style=\"--awb-margin-bottom-small:8px;\"><h2 class=\"fusion-title-heading title-heading-left fusion-responsive-typography-calculated\" style=\"margin:0;--fontSize:50;line-height:1.2;\">Tips 3: Crafting the right features<\/h2><\/div><div class=\"fusion-text fusion-text-19\"><p>People may think that just adding a dummy variable in your training dataset will be enough. This works to guide the model in understanding why demand or sales is higher at one point in time. However, it is a really poor way to model how promotion may impact sales. Typically, some promotion types may be more efficient than others, the impact of promotions may also be higher near the start date, lower after (as there are few people left that could benefit from the reduction).<\/p>\n<\/div><div class=\"fusion-text fusion-text-20\"><p>A more sophisticated feature we found useful when using boosting algorithms is to compute sales rolling means so as to give your model insights about how much each promo type was successful in the past.<\/p>\n<\/div><div class=\"fusion-title title fusion-title-8 fusion-sep-none fusion-title-text fusion-title-size-three\" style=\"--awb-margin-bottom-small:8px;\"><h3 class=\"fusion-title-heading title-heading-left fusion-responsive-typography-calculated\" style=\"margin:0;--fontSize:20;line-height:1.2;\">1. Theory<\/h3><\/div><div class=\"fusion-text fusion-text-21\"><p>The idea behind this kind of feature is to measure, for a given promotion, the average volume recently generated by \u201csimilar promotions\u201d in the past. We are going to compute the average historical sell-in on a similar scope (same promotion type, same SKU, same retailer) on a rolling window with a given horizon (e.g. on the 7 past days).<\/p>\n<\/div><div class=\"fusion-text fusion-text-22\"><p><img decoding=\"async\" class=\"lazyload aligncenter wp-image-61079 size-full\" src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-a-rolling-mean-with-a-7-days-window.png\" data-orig-src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-a-rolling-mean-with-a-7-days-window.png\" alt=\"\" width=\"649\" height=\"210\" srcset=\"data:image\/svg+xml,%3Csvg%20xmlns%3D%27http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%27%20width%3D%27649%27%20height%3D%27210%27%20viewBox%3D%270%200%20649%20210%27%3E%3Crect%20width%3D%27649%27%20height%3D%27210%27%20fill-opacity%3D%220%22%2F%3E%3C%2Fsvg%3E\" data-srcset=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-a-rolling-mean-with-a-7-days-window-200x65.png 200w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-a-rolling-mean-with-a-7-days-window-300x97.png 300w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-a-rolling-mean-with-a-7-days-window-400x129.png 400w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-a-rolling-mean-with-a-7-days-window-600x194.png 600w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-of-a-rolling-mean-with-a-7-days-window.png 649w\" data-sizes=\"auto\" data-orig-sizes=\"(max-width: 649px) 100vw, 649px\" \/><\/p>\n<p style=\"text-align: center;\">Example of a rolling mean with a 7-days window<\/p>\n<\/div><div class=\"fusion-text fusion-text-23\"><p><strong>For this kind of features, a particular attention should be paid to data leakage, especially when setting the time horizon.<\/strong><\/p>\n<\/div><div class=\"fusion-title title fusion-title-9 fusion-sep-none fusion-title-text fusion-title-size-three\" style=\"--awb-margin-bottom-small:8px;\"><h3 class=\"fusion-title-heading title-heading-left fusion-responsive-typography-calculated\" style=\"margin:0;--fontSize:20;line-height:1.2;\">2. Python implementation<\/h3><\/div><div class=\"fusion-text fusion-text-24\"><p>Let\u2019s see how to implement a 7-days rolling mean feature step by step in Python. First of all, let\u2019s define our DataFrame with the following information:<\/p>\n<\/div><ul style=\"--awb-line-height:27.2px;--awb-icon-width:27.2px;--awb-icon-height:27.2px;--awb-icon-margin:11.2px;--awb-content-margin:38.4px;\" class=\"fusion-checklist fusion-checklist-3 fusion-checklist-default type-icons\"><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-no\"><i class=\"fusion-li-icon awb-icon-check\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p>SKU<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-no\"><i class=\"fusion-li-icon awb-icon-check\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p>Retailer<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-no\"><i class=\"fusion-li-icon awb-icon-check\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p>Promotion type<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-no\"><i class=\"fusion-li-icon awb-icon-check\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p>Promotion ID<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-no\"><i class=\"fusion-li-icon awb-icon-check\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p>Date<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-no\"><i class=\"fusion-li-icon awb-icon-check\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p>Sell-in (<em>our target variable<\/em>)<\/p>\n<\/div><\/li><\/ul><div class=\"fusion-text fusion-text-25\"><div class=\"code\">\n<p># Initialize our example dataframe with 6 columns: sku, retailer, promotion type,<br \/>\n# promotion id, date, sellin<br \/>\ndf = pd.DataFrame(<\/p>\n<p>)<\/p>\n<p># Initialize our horizon: 7-days rolling mean<br \/>\nhorizon = 7<\/p>\n<p># Add a line \"in the future\" for which we want to forecast the sell-in (unknown for<br \/>\n# now) and therefore for which we want to have a value for the rolling mean feature<br \/>\ndf = df.append(<br \/>\n,<br \/>\nignore_index=True<br \/>\n)<\/p>\n<\/div>\n<\/div><div class=\"fusion-text fusion-text-26\"><p>Once created, our DataFrame looks like this:<\/p>\n<\/div><div class=\"fusion-text fusion-text-27\"><p><img decoding=\"async\" class=\"lazyload aligncenter wp-image-61078 size-full\" src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Initial-DataFrame.png\" data-orig-src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Initial-DataFrame.png\" alt=\"\" width=\"386\" height=\"624\" srcset=\"data:image\/svg+xml,%3Csvg%20xmlns%3D%27http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%27%20width%3D%27386%27%20height%3D%27624%27%20viewBox%3D%270%200%20386%20624%27%3E%3Crect%20width%3D%27386%27%20height%3D%27624%27%20fill-opacity%3D%220%22%2F%3E%3C%2Fsvg%3E\" data-srcset=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Initial-DataFrame-186x300.png 186w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Initial-DataFrame-200x323.png 200w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Initial-DataFrame.png 386w\" data-sizes=\"auto\" data-orig-sizes=\"(max-width: 386px) 100vw, 386px\" \/><\/p>\n<p style=\"text-align: center;\">Initial DataFrame<\/p>\n<\/div><div class=\"fusion-text fusion-text-28\"><p>Next we will create two important columns : the start date of promotion and the rolling mean (<em>empty for now<\/em>).<\/p>\n<\/div><div class=\"fusion-text fusion-text-29\"><div class=\"code\">\n<p># We create two new columns:<br \/>\n# \u2013 the minimum promo date (start date based on promotion ID)<br \/>\ndf = df.merge(<br \/>\ndf.groupby([\"sku\", \"retailer\", \"promotion_id\"]).date.min()<br \/>\n.reset_index()<br \/>\n.rename(columns=),<br \/>\non=[\"sku\", \"retailer\", \"promotion_id\"],<br \/>\nhow=\"left\"<br \/>\n)<br \/>\ndf = df.sort_values(\"min_promo_date\")<\/p>\n<p># \u2013 the rolling mean feature, filled with NaN for the moment<br \/>\ndf['promo_rolling_mean'] = np.nan<\/p>\n<\/div>\n<\/div><div class=\"fusion-text fusion-text-30\"><p>Now, the DataFrame should look like this:<\/p>\n<\/div><div class=\"fusion-text fusion-text-31\"><p><img decoding=\"async\" class=\"lazyload aligncenter wp-image-61077 size-full\" src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Head-of-the-DataFrame-with-the-two-new-columns.png\" data-orig-src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Head-of-the-DataFrame-with-the-two-new-columns.png\" alt=\"\" width=\"593\" height=\"146\" srcset=\"data:image\/svg+xml,%3Csvg%20xmlns%3D%27http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%27%20width%3D%27593%27%20height%3D%27146%27%20viewBox%3D%270%200%20593%20146%27%3E%3Crect%20width%3D%27593%27%20height%3D%27146%27%20fill-opacity%3D%220%22%2F%3E%3C%2Fsvg%3E\" data-srcset=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Head-of-the-DataFrame-with-the-two-new-columns-200x49.png 200w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Head-of-the-DataFrame-with-the-two-new-columns-300x74.png 300w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Head-of-the-DataFrame-with-the-two-new-columns-400x98.png 400w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Head-of-the-DataFrame-with-the-two-new-columns.png 593w\" data-sizes=\"auto\" data-orig-sizes=\"(max-width: 593px) 100vw, 593px\" \/><\/p>\n<p style=\"text-align: center;\">Head of the DataFrame with the two new columns<\/p>\n<\/div><div class=\"fusion-text fusion-text-32\"><p>From there, we can start filling in the promo_rolling_mean column. Remember that the goal is to compute the mean of sell-in of previous similar promotions but <strong>the notion of similarity can be tricky<\/strong>. In the best case scenario, we have in our history a promotion with the same type, for the same retailer, for the same SKU. In worst case scenario, there is a new promotion with a new type for which we do not have any history for any SKU, any retailer. Therefore the idea is to define several levels of granularity for which we will see if we have a history and therefore a possibility to calculate a rolling mean, starting from the most granular level (e.g. SKU x retailer x promotion type) to the least granular level (e.g. SKU).<\/p>\n<\/div><div class=\"fusion-text fusion-text-33\"><p>For example, let\u2019s take SKU : 1, retailer : A, promotion type : 1, date : 2020\u201301\u201301. We look for a similar promotion in the past. Lucky for us, there has been a promotion with the same promotion type, for the same SKU, the same retailer (i.e. the most granular level) in 2019 (promotion_id = \u2018A1\u20132019\u2019). Thus we will take the mean of the sell-in for the 7 most recent dates where this kind of promotion happened. In other cases we may not find any match for this granularity so we will be looking for a match by SKU and promotion type only. Again, if there is no match, we will finally take the mean at the SKU level only.<\/p>\n<\/div><div class=\"fusion-text fusion-text-34\"><div class=\"code\">\n<p># Definition of granularity levels to compute the rolling means, from the most granular<br \/>\n# to the less granular<br \/>\nAGG_LEVELS = <\/p>\n<p># We iterate on the granularity levels (from the most granular to the less granular) in<br \/>\n# order to compute the rolling mean on the most similar promotion for each row<br \/>\nfor agg_level_number, agg_level_columns in AGG_LEVELS.items():<\/p>\n<p># Once the rolling mean feature is filled, we break from the loop<br \/>\nif df[\"promo_rolling_mean\"].isna().sum() == 0:<br \/>\nbreak<\/p>\n<p># (1) We aggregate our dataframe to the current granularity level<br \/>\nagg_level_df = df.groupby([\"promotion_id\"] + agg_level_columns)<br \/>\n.agg()<br \/>\n.reset_index()<br \/>\n.rename(columns=)<br \/>\n.dropna(subset=[\"sellin\"])<br \/>\n.sort_values(\"min_promo_date\")<\/p>\n<p># (2) We compute the rolling mean on the given horizon for the current granularity<br \/>\n# level<br \/>\nagg_level_df[\"sellin\"] = agg_level_df.groupby(agg_level_columns)<br \/>\n.rolling(horizon, 1)[\"sellin\"]\n.mean()<br \/>\n.droplevel(<br \/>\nlevel=list(<br \/>\nrange(len(agg_level_columns))<br \/>\n)<br \/>\n)<\/p>\n<p># (3) We merge the results with the main dataframe on the right columns and min promo<br \/>\n# date. We use the merge_asof to only take rolling means computed for dates before each<br \/>\n# observation date.<br \/>\ndf = pd.merge_asof(<br \/>\ndf,<br \/>\nagg_level_df,<br \/>\nby=agg_level_columns,<br \/>\non=\"min_promo_date\",<br \/>\ndirection=\"backward\",<br \/>\nsuffixes=(None, f\"_\"),<br \/>\nallow_exact_matches=False<br \/>\n)<\/p>\n<p># We fill the feature with the rolling mean values for the current granularity level<br \/>\ndf[\"promo_rolling_mean\"] = df[\"promo_rolling_mean\"].fillna(<br \/>\ndf[f\"sellin_\"])<\/p>\n<p>cols_to_keep = [<br \/>\n\"sku\", \"retailer\", \"promotion_type\", \"promotion_id\", \"date\",<br \/>\n\"sellin\", \"promo_rolling_mean\"<br \/>\n]<\/p>\n<p>df = df[cols_to_keep].sort_values(<br \/>\nby=['sku', 'retailer', 'promotion_type', 'promotion_id', 'date'])<\/p>\n<\/div>\n<\/div><div class=\"fusion-text fusion-text-35\"><p>At the end of the for loop, as you merge this new feature with the train set, you must pay attention at data leakage and only take rolling means computed for dates before each observation date (the date for which you want to make forecasts). Here, we have decided to use the merge_asof method. This allows us to merge two datasets avoiding exact matches. The idea behind it is: do not take the exact date match (with the allow_exact_matches=False parameter), but take the previous ones (with the direction=\u201dbackward\u201d parameter).<\/p>\n<\/div><div class=\"fusion-text fusion-text-36\"><p>Here is what our dataset looks like with the rolling mean feature filled after this step:<\/p>\n<\/div><div class=\"fusion-text fusion-text-37\"><p><img decoding=\"async\" class=\"lazyload aligncenter wp-image-61076 size-full\" src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Final-DataFrame-with-the-rolling-mean-feature.png\" data-orig-src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Final-DataFrame-with-the-rolling-mean-feature.png\" alt=\"\" width=\"502\" height=\"628\" srcset=\"data:image\/svg+xml,%3Csvg%20xmlns%3D%27http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%27%20width%3D%27502%27%20height%3D%27628%27%20viewBox%3D%270%200%20502%20628%27%3E%3Crect%20width%3D%27502%27%20height%3D%27628%27%20fill-opacity%3D%220%22%2F%3E%3C%2Fsvg%3E\" data-srcset=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Final-DataFrame-with-the-rolling-mean-feature-200x250.png 200w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Final-DataFrame-with-the-rolling-mean-feature-240x300.png 240w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Final-DataFrame-with-the-rolling-mean-feature-400x500.png 400w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Final-DataFrame-with-the-rolling-mean-feature.png 502w\" data-sizes=\"auto\" data-orig-sizes=\"(max-width: 502px) 100vw, 502px\" \/><\/p>\n<p style=\"text-align: center;\">Final DataFrame with the rolling mean feature<\/p>\n<\/div><div class=\"fusion-text fusion-text-38\"><p>We can first see that there are some missing values for the rolling mean feature at the top of the DataFrame. This is normal and is due to the fact that for the first rows, we do not have any history on any SKU, any retailer, therefore no possibility to compute a rolling mean. This is the only case where the rolling mean will be empty, any other case can be handled by the definition of the granularity aggregation levels.<\/p>\n<\/div><div class=\"fusion-text fusion-text-39\"><p>For example, for the specific row we have defined at the beginning (SKU: 1, Retailer A, Promotion type: 3, Date: 2020\u201306\u201301), the one for which we don\u2019t know the sell-in yet, the rolling mean value will be the mean of the sell-in for the most similar and recent promotion. In our case, there is no history for the promotion type 3 for the retailer A, but there is for retailer B. Therefore, the rolling mean value will be the mean of sell-in for SKU=1, Promotion type= 3, Retailer=B, here: mean([134, 146]) = 140.<\/p>\n<\/div><div class=\"fusion-text fusion-text-40\"><p><img decoding=\"async\" class=\"lazyload wp-image-61075 size-full aligncenter\" src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-for-a-new-promotion-type-for-a-couple-SKU-x-retailer.png\" data-orig-src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-for-a-new-promotion-type-for-a-couple-SKU-x-retailer.png\" alt=\"\" width=\"504\" height=\"106\" srcset=\"data:image\/svg+xml,%3Csvg%20xmlns%3D%27http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%27%20width%3D%27504%27%20height%3D%27106%27%20viewBox%3D%270%200%20504%20106%27%3E%3Crect%20width%3D%27504%27%20height%3D%27106%27%20fill-opacity%3D%220%22%2F%3E%3C%2Fsvg%3E\" data-srcset=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-for-a-new-promotion-type-for-a-couple-SKU-x-retailer-200x42.png 200w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-for-a-new-promotion-type-for-a-couple-SKU-x-retailer-300x63.png 300w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-for-a-new-promotion-type-for-a-couple-SKU-x-retailer-400x84.png 400w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Example-for-a-new-promotion-type-for-a-couple-SKU-x-retailer.png 504w\" data-sizes=\"auto\" data-orig-sizes=\"(max-width: 504px) 100vw, 504px\" \/><\/p>\n<p style=\"text-align: center;\">Example for a new promotion type for a couple SKU x retailer<\/p>\n<\/div><div class=\"fusion-text fusion-text-41\"><p>This logic can be extended to several other cases that can be encountered in this type of projects. For example, an additional level could be created which would be the product family and which could be used in the case where there is no history for a given product. In that case we will take the average based on products belonging to the same family. It is therefore important to think about these levels of granularity and prioritize them according to your own definition of \u201cpromotion similarity\u201d which can be based on your EDA or business insights, for example.<\/p>\n<\/div><div class=\"fusion-text fusion-text-42\"><p>Instead of the rolling means, you can also measure the promotional uplift (i.e. the additional volume generated by a given promotion) for a given product and a given customer. The idea would be to compute a ratio between sales during a given promotion and the sales without any promotion instead.<\/p>\n<\/div><div class=\"fusion-title title fusion-title-10 fusion-sep-none fusion-title-text fusion-title-size-two\" style=\"--awb-margin-bottom-small:8px;\"><h2 class=\"fusion-title-heading title-heading-left fusion-responsive-typography-calculated\" style=\"margin:0;--fontSize:50;line-height:1.2;\">Tips 4: Dealing with big data<\/h2><\/div><div class=\"fusion-text fusion-text-43\"><p>Working at such a level of granularity can drastically increase complexity and the need for computational power. If like us you are dealing with hundreds of SKUs, retailers and several years of daily historical sell-in, it will be essential to find a way to parallelize computations. For example, if you have no need to get any information from other SKUs for your promotion features, you can partition your data on the sku column and use distributed computing. We found it useful to use Dask for this task:<\/p>\n<\/div><div class=\"fusion-text fusion-text-44\"><div class=\"code\">\n<p>from dask import delayed, compute<\/p>\n<p>def compute_rolling_mean(df):<br \/>\n&#8230;<br \/>\nreturn df<\/p>\n<p>skus_list = set(df[&#8216;sku&#8217;])<br \/>\ndfs_with_promo = [<br \/>\ndelayed(compute_rolling_mean)(df.loc[df.sku == sku]) for sku in skus_list<br \/>\n]\ndf_final = pd.concat(compute(*dfs_with_promo), axis=0, ignore_index=True)<\/p>\n<\/div>\n<\/div><div class=\"fusion-title title fusion-title-11 fusion-sep-none fusion-title-text fusion-title-size-two\" style=\"--awb-margin-bottom-small:8px;\"><h2 class=\"fusion-title-heading title-heading-left fusion-responsive-typography-calculated\" style=\"margin:0;--fontSize:50;line-height:1.2;\">Tips 5: Taking into account demand transfers across products<\/h2><\/div><div class=\"fusion-text fusion-text-45\"><p>Don\u2019t forget that each SKU sales will be impacted by its promotions, but also by promotions from substitutable products, such phenomena is known as cannibalization. In order to have a performant model, it is mandatory to anticipate the potential downlift on some products resulting from cannibalization.<\/p>\n<\/div><div class=\"fusion-text fusion-text-46\"><p><img decoding=\"async\" class=\"lazyload aligncenter wp-image-61074 size-full\" src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Cannibalization-illustration.png\" data-orig-src=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Cannibalization-illustration.png\" alt=\"\" width=\"603\" height=\"275\" srcset=\"data:image\/svg+xml,%3Csvg%20xmlns%3D%27http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%27%20width%3D%27603%27%20height%3D%27275%27%20viewBox%3D%270%200%20603%20275%27%3E%3Crect%20width%3D%27603%27%20height%3D%27275%27%20fill-opacity%3D%220%22%2F%3E%3C%2Fsvg%3E\" data-srcset=\"https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Cannibalization-illustration-200x91.png 200w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Cannibalization-illustration-300x137.png 300w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Cannibalization-illustration-400x182.png 400w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Cannibalization-illustration-600x274.png 600w, https:\/\/www.artefact.com\/\/wp-content\/uploads\/2021\/05\/Cannibalization-illustration.png 603w\" data-sizes=\"auto\" data-orig-sizes=\"(max-width: 603px) 100vw, 603px\" \/><\/p>\n<p style=\"text-align: center;\">Cannibalization illustration<\/p>\n<\/div><div class=\"fusion-text fusion-text-47\"><p>In order to be able to model the phenomenon, we first have to detect the cannibalization relationships between products. Two main approaches can be distinguished:<\/p>\n<\/div><ul style=\"--awb-line-height:27.2px;--awb-icon-width:27.2px;--awb-icon-height:27.2px;--awb-icon-margin:11.2px;--awb-content-margin:38.4px;\" class=\"fusion-checklist fusion-checklist-4 fusion-checklist-default type-icons\"><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-no\"><i class=\"fusion-li-icon awb-icon-check\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p>Use an existing mapping: if such mapping exists, all we have to do is to add this information to our dataset (e.g. a column \u201cassociated cannibalizing product\u201d) and create specific features based on that.<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-no\"><i class=\"fusion-li-icon awb-icon-check\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p>Detect the cannibalization relationships automatically: the idea will be to establish relationships by looking at product similarities and behaviors in the past.<\/p>\n<\/div><\/li><\/ul><div class=\"fusion-title title fusion-title-12 fusion-sep-none fusion-title-text fusion-title-size-three\" style=\"--awb-margin-bottom-small:8px;\"><h3 class=\"fusion-title-heading title-heading-left fusion-responsive-typography-calculated\" style=\"margin:0;--fontSize:20;line-height:1.2;\">Automatic detection deep dive<\/h3><\/div><div class=\"fusion-text fusion-text-48\"><p>One possibility to automatically detect cannibalization relationships is to use correlation scores. The idea is to bring together products that are really likely to cannibalize each other, not on the basis of their category, but on the basis of correlations between the evolution of their historical sales. Correlation scores are calculated for each pair of products and if they are strongly negative then we can assume that these products are cannibalizing each other<\/p>\n<\/div><div class=\"fusion-title title fusion-title-13 fusion-sep-none fusion-title-text fusion-title-size-three\" style=\"--awb-margin-bottom-small:8px;\"><h3 class=\"fusion-title-heading title-heading-left fusion-responsive-typography-calculated\" style=\"margin:0;--fontSize:20;line-height:1.2;\">Cannibalization features deep dive<\/h3><\/div><div class=\"fusion-text fusion-text-49\"><p>From these cannibalization relationships we can create features following the same approach as for direct promotions. For example:<\/p>\n<\/div><ul style=\"--awb-line-height:27.2px;--awb-icon-width:27.2px;--awb-icon-height:27.2px;--awb-icon-margin:11.2px;--awb-content-margin:38.4px;\" class=\"fusion-checklist fusion-checklist-5 fusion-checklist-default type-icons\"><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-no\"><i class=\"fusion-li-icon awb-icon-check\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p>Associate to each cannibalized product, the promotion rolling mean computed for the cannibalizing product. The \u201cvolume\u201d in those features is therefore the \u201cvolume\u201dof the cannibalizing products.<\/p>\n<\/div><\/li><li class=\"fusion-li-item\" style=\"\"><span class=\"icon-wrapper circle-no\"><i class=\"fusion-li-icon awb-icon-check\" aria-hidden=\"true\"><\/i><\/span><div class=\"fusion-li-item-content\">\n<p>Associate to each cannibalized product, the downlift (loss of volume) that generally appears when the associated cannibalizing product is sold. The \u201cvolume\u201d in those features is therefore the \u201cvolume\u201dof the cannibalized products.<\/p>\n<\/div><\/li><\/ul><div class=\"fusion-title title fusion-title-14 fusion-sep-none fusion-title-text fusion-title-size-two\" style=\"--awb-margin-bottom-small:8px;\"><h2 class=\"fusion-title-heading title-heading-left fusion-responsive-typography-calculated\" style=\"margin:0;--fontSize:50;line-height:1.2;\">Results and Conclusions<\/h2><\/div><div class=\"fusion-text fusion-text-50\"><p>In our projects, we have observed that, in most cases, rolling mean features tended to work better for the model than uplift\/downlift features. For example, for a given country, the rolling mean features resulted in a 2.8% increase in forecast accuracy while the uplift features resulted in a 2% increase.<\/p>\n<\/div><div class=\"fusion-text fusion-text-51\"><p>However, each project is different and our main learning is that the exploration phase is essential and serves as a basis for the creation of features later on. It is necessary to really understand how promotions work and their impact in order to model them correctly. This involves discussions with the business owners as well as Exploratory Data Analysis.<\/p>\n<\/div><\/div><\/div><\/div><\/article><div class=\"fusion-fullwidth fullwidth-box fusion-builder-row-5 fusion-flex-container nonhundred-percent-fullwidth non-hundred-percent-height-scrolling\" style=\"--awb-border-radius-top-left:0px;--awb-border-radius-top-right:0px;--awb-border-radius-bottom-right:0px;--awb-border-radius-bottom-left:0px;--awb-flex-wrap:wrap;\" ><div class=\"fusion-builder-row fusion-row fusion-flex-align-items-flex-start fusion-flex-content-wrap\" style=\"max-width:calc( 1440px + 20px );margin-left: calc(-20px \/ 2 );margin-right: calc(-20px \/ 2 );\"><div class=\"fusion-layout-column fusion_builder_column fusion-builder-column-4 fusion_builder_column_1_1 1_1 fusion-flex-column\" style=\"--awb-bg-size:cover;--awb-width-large:100%;--awb-margin-top-large:0px;--awb-spacing-right-large:10px;--awb-margin-bottom-large:0px;--awb-spacing-left-large:10px;--awb-width-medium:100%;--awb-order-medium:0;--awb-spacing-right-medium:10px;--awb-spacing-left-medium:10px;--awb-width-small:100%;--awb-order-small:0;--awb-spacing-right-small:10px;--awb-spacing-left-small:10px;\"><div class=\"fusion-column-wrapper fusion-column-has-shadow fusion-flex-justify-content-flex-start fusion-content-layout-column\"><div class=\"fusion-text fusion-text-52\"><p>Your Content Goes Here<\/p>\n<\/div><div ><a class=\"fusion-button button-flat fusion-button-default-size button-default fusion-button-default button-2 fusion-button-default-span fusion-button-default-type button-primary-medium\" target=\"_self\"><span class=\"fusion-button-text awb-button__text awb-button__text--default\">Button Text<\/span><\/a><\/div><\/div><\/div><\/div><\/div><\/p>\n","protected":false},"excerpt":{"rendered":"<p>27. Mai 2021<br \/>\nIn diesem folgenden Artikel einer gro\u00dfen Serie von Beitr\u00e4gen, die der Nachfrageprognose gewidmet sind, konzentrieren wir uns auf die Modellierung von Promotionen, einem wichtigen Faktor bei der Absatzprognose. Wir sehen uns an, wie ein typisches Promo dataset aussieht, wie die Funktionen gestaltet werden sollten, mit einem Schritt-f\u00fcr-Schritt-Beispiel in Python, und wie man mit komplexer Promotion-Granularit\u00e4t umgehen kann.<\/p>","protected":false},"featured_media":61085,"parent":0,"template":"","meta":{"_acf_changed":false,"ep_exclude_from_search":false},"blog-category":[22035],"blog-language":[2991],"class_list":["post-61071","blog","type-blog","status-publish","has-post-thumbnail","hentry","blog-category-data-ai-consulting","blog-language-en"],"acf":[],"_links":{"self":[{"href":"https:\/\/www.artefact.com\/de\/wp-json\/wp\/v2\/blog\/61071","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.artefact.com\/de\/wp-json\/wp\/v2\/blog"}],"about":[{"href":"https:\/\/www.artefact.com\/de\/wp-json\/wp\/v2\/types\/blog"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.artefact.com\/de\/wp-json\/wp\/v2\/media\/61085"}],"wp:attachment":[{"href":"https:\/\/www.artefact.com\/de\/wp-json\/wp\/v2\/media?parent=61071"}],"wp:term":[{"taxonomy":"blog-category","embeddable":true,"href":"https:\/\/www.artefact.com\/de\/wp-json\/wp\/v2\/blog-category?post=61071"},{"taxonomy":"blog-language","embeddable":true,"href":"https:\/\/www.artefact.com\/de\/wp-json\/wp\/v2\/blog-language?post=61071"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}