Here’s the Python program for a retail sales analysis and forecasting project, focusing on data collection, cleaning, analysis, and forecasting without visualization. There will be a detailed explanation in the next lesson.
import pandas as pd
import numpy as np
import requests
import json
from sqlalchemy import create_engine
from statsmodels.tsa.statespace.sarimax import SARIMAX
from fbprophet import Prophet
from sklearn.metrics import mean_absolute_error
# Step 1: Data Collection
# Example function to collect data from an online sales platform API
def collect_online_sales_data(api_url, headers):
response = requests.get(api_url, headers=headers)
data = response.json()
return pd.DataFrame(data)
# Example function to extract data from in-store POS system
def collect_pos_data(db_connection_string, query):
engine = create_engine(db_connection_string)
data = pd.read_sql_query(query, engine)
return data
# Example function to read third-party logistics data from CSV
def collect_logistics_data(csv_file_path):
return pd.read_csv(csv_file_path)
# Step 2: Data Cleaning
def clean_data(df):
# Handling missing values
df = df.dropna(subset=['sales', 'date'])
df['date'] = pd.to_datetime(df['date'])
df = df.fillna(0)
return df
# Step 3: Data Analysis
def analyze_sales(df):
# Group by date to get daily sales
daily_sales = df.groupby('date').agg({'sales': 'sum'}).reset_index()
return daily_sales
def get_top_selling_products(df, top_n=10):
top_products = df.groupby('product_id').agg({'sales': 'sum'}).sort_values(by='sales', ascending=False).head(top_n)
return top_products
# Step 4: Sales Forecasting
def sarima_forecasting(df, order, seasonal_order):
model = SARIMAX(df['sales'], order=order, seasonal_order=seasonal_order)
model_fit = model.fit(disp=False)
forecast = model_fit.forecast(steps=30)
return forecast
def prophet_forecasting(df):
df = df.rename(columns={'date': 'ds', 'sales': 'y'})
model = Prophet()
model.fit(df)
future = model.make_future_dataframe(periods=30)
forecast = model.predict(future)
return forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
# Step 5: Inventory and Marketing Recommendations
def recommend_inventory_levels(forecast):
inventory_levels = forecast[['ds', 'yhat']].copy()
inventory_levels['recommended_inventory'] = inventory_levels['yhat'] * 1.2 # 20% buffer
return inventory_levels
def recommend_marketing_strategies(df):
# Example: Identify products with low sales and high inventory
low_sales_products = df[df['sales'] < df['sales'].quantile(0.25)]
high_inventory_products = df[df['inventory'] > df['inventory'].quantile(0.75)]
target_products = pd.merge(low_sales_products, high_inventory_products, on='product_id')
return target_products
# Main execution flow
if __name__ == "__main__":
# Collect data
online_sales_df = collect_online_sales_data('https://api.example.com/sales', headers={'Authorization': 'Bearer YOUR_API_KEY'})
pos_df = collect_pos_data('postgresql://username:password@localhost/dbname', 'SELECT * FROM sales')
logistics_df = collect_logistics_data('logistics_data.csv')
# Combine data
sales_df = pd.concat([online_sales_df, pos_df, logistics_df], ignore_index=True)
# Clean data
sales_df = clean_data(sales_df)
# Analyze data
daily_sales = analyze_sales(sales_df)
top_products = get_top_selling_products(sales_df)
# Forecast sales
sarima_forecast = sarima_forecasting(daily_sales, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
prophet_forecast = prophet_forecasting(daily_sales)
# Recommendations
inventory_recommendations = recommend_inventory_levels(prophet_forecast)
marketing_recommendations = recommend_marketing_strategies(sales_df)
# Output results
print("Top Selling Products:\n", top_products)
print("SARIMA Forecast:\n", sarima_forecast)
print("Prophet Forecast:\n", prophet_forecast)
print("Inventory Recommendations:\n", inventory_recommendations)
print("Marketing Recommendations:\n", marketing_recommendations)
clean_data function handles missing values and ensures date formatting.analyze_sales groups sales data by date to get daily sales.get_top_selling_products identifies the top-selling products.sarima_forecasting uses the SARIMA model to forecast sales.prophet_forecasting uses the Prophet model to forecast sales.recommend_inventory_levels provides inventory recommendations based on forecasted sales.recommend_marketing_strategies identifies products for targeted marketing based on sales and inventory levels.