Main Code

Here’s the Python program for a retail sales analysis and forecasting project, focusing on data collection, cleaning, analysis, and forecasting without visualization. There will be a detailed explanation in the next lesson.

import pandas as pd
import numpy as np
import requests
import json
from sqlalchemy import create_engine
from statsmodels.tsa.statespace.sarimax import SARIMAX
from fbprophet import Prophet
from sklearn.metrics import mean_absolute_error

# Step 1: Data Collection

# Example function to collect data from an online sales platform API
def collect_online_sales_data(api_url, headers):
    response = requests.get(api_url, headers=headers)
    data = response.json()
    return pd.DataFrame(data)

# Example function to extract data from in-store POS system
def collect_pos_data(db_connection_string, query):
    engine = create_engine(db_connection_string)
    data = pd.read_sql_query(query, engine)
    return data

# Example function to read third-party logistics data from CSV
def collect_logistics_data(csv_file_path):
    return pd.read_csv(csv_file_path)

# Step 2: Data Cleaning

def clean_data(df):
    # Handling missing values
    df = df.dropna(subset=['sales', 'date'])
    df['date'] = pd.to_datetime(df['date'])
    df = df.fillna(0)
    return df

# Step 3: Data Analysis

def analyze_sales(df):
    # Group by date to get daily sales
    daily_sales = df.groupby('date').agg({'sales': 'sum'}).reset_index()
    return daily_sales

def get_top_selling_products(df, top_n=10):
    top_products = df.groupby('product_id').agg({'sales': 'sum'}).sort_values(by='sales', ascending=False).head(top_n)
    return top_products

# Step 4: Sales Forecasting

def sarima_forecasting(df, order, seasonal_order):
    model = SARIMAX(df['sales'], order=order, seasonal_order=seasonal_order)
    model_fit = model.fit(disp=False)
    forecast = model_fit.forecast(steps=30)
    return forecast

def prophet_forecasting(df):
    df = df.rename(columns={'date': 'ds', 'sales': 'y'})
    model = Prophet()
    model.fit(df)
    future = model.make_future_dataframe(periods=30)
    forecast = model.predict(future)
    return forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

# Step 5: Inventory and Marketing Recommendations

def recommend_inventory_levels(forecast):
    inventory_levels = forecast[['ds', 'yhat']].copy()
    inventory_levels['recommended_inventory'] = inventory_levels['yhat'] * 1.2  # 20% buffer
    return inventory_levels

def recommend_marketing_strategies(df):
    # Example: Identify products with low sales and high inventory
    low_sales_products = df[df['sales'] < df['sales'].quantile(0.25)]
    high_inventory_products = df[df['inventory'] > df['inventory'].quantile(0.75)]
    target_products = pd.merge(low_sales_products, high_inventory_products, on='product_id')
    return target_products

# Main execution flow
if __name__ == "__main__":
    # Collect data
    online_sales_df = collect_online_sales_data('https://api.example.com/sales', headers={'Authorization': 'Bearer YOUR_API_KEY'})
    pos_df = collect_pos_data('postgresql://username:password@localhost/dbname', 'SELECT * FROM sales')
    logistics_df = collect_logistics_data('logistics_data.csv')

    # Combine data
    sales_df = pd.concat([online_sales_df, pos_df, logistics_df], ignore_index=True)

    # Clean data
    sales_df = clean_data(sales_df)

    # Analyze data
    daily_sales = analyze_sales(sales_df)
    top_products = get_top_selling_products(sales_df)

    # Forecast sales
    sarima_forecast = sarima_forecasting(daily_sales, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
    prophet_forecast = prophet_forecasting(daily_sales)

    # Recommendations
    inventory_recommendations = recommend_inventory_levels(prophet_forecast)
    marketing_recommendations = recommend_marketing_strategies(sales_df)

    # Output results
    print("Top Selling Products:\n", top_products)
    print("SARIMA Forecast:\n", sarima_forecast)
    print("Prophet Forecast:\n", prophet_forecast)
    print("Inventory Recommendations:\n", inventory_recommendations)
    print("Marketing Recommendations:\n", marketing_recommendations)

Short Explanation

  1. Data Collection:
    • Functions are provided to collect data from an API, a database, and a CSV file.
  2. Data Cleaning:
    • The clean_data function handles missing values and ensures date formatting.
  3. Data Analysis:
    • analyze_sales groups sales data by date to get daily sales.
    • get_top_selling_products identifies the top-selling products.
  4. Sales Forecasting:
    • sarima_forecasting uses the SARIMA model to forecast sales.
    • prophet_forecasting uses the Prophet model to forecast sales.
  5. Inventory and Marketing Recommendations:
    • recommend_inventory_levels provides inventory recommendations based on forecasted sales.
    • recommend_marketing_strategies identifies products for targeted marketing based on sales and inventory levels.