Skip to content
Snippets Groups Projects
Nettoyage_donnees_dataViz.ipynb 38.6 KiB
Newer Older
  • Learn to ignore specific revisions
  • {
      "nbformat": 4,
      "nbformat_minor": 0,
      "metadata": {
        "colab": {
          "provenance": []
        },
        "kernelspec": {
          "name": "python3",
          "display_name": "Python 3"
        },
        "language_info": {
          "name": "python"
        }
      },
      "cells": [
        {
          "cell_type": "code",
    
          "metadata": {
            "id": "pz0Tz9u2ZDn_"
          },
          "outputs": [],
          "source": [
            "import pandas as pd\n",
            "from google.colab import files"
          ]
        },
    
        {
          "cell_type": "markdown",
          "source": [
            "#Exploration des données"
          ],
          "metadata": {
            "id": "7FDXPUMEkVIU"
          }
        },
    
        {
          "cell_type": "code",
          "source": [
            "# chargement des données\n",
    
            "data = pd.read_csv('apple_health_export_2024-12-29.csv')"
    
          ],
          "metadata": {
            "colab": {
              "base_uri": "https://localhost:8080/"
            },
            "id": "wkcZbH8vZlQ8",
    
            "outputId": "c69b2ac8-48d7-456a-a290-653489349cac"
    
          "outputs": [
            {
              "output_type": "stream",
              "name": "stderr",
              "text": [
    
                "<ipython-input-65-fc50c9a5452d>:2: DtypeWarning: Columns (0,1,3,4,5,6,7,8,9,11,12,15,17,18,19,23,25) have mixed types. Specify dtype option on import or set low_memory=False.\n",
                "  data = pd.read_csv('apple_health_export_2024-12-29.csv')\n"
    
              ]
            }
          ]
        },
        {
          "cell_type": "code",
          "source": [
            "# exploration des données\n",
            "print(data.head())"
          ],
          "metadata": {
            "colab": {
              "base_uri": "https://localhost:8080/"
            },
            "id": "NiEm7PsgaErc",
    
            "outputId": "e5648f0a-7a7e-44d4-f17f-c55229c0fbd2"
    
          "outputs": [
            {
              "output_type": "stream",
              "name": "stdout",
              "text": [
                "                 type sourceName   value  unit                  startDate  \\\n",
                "0   BasalEnergyBurned     Iphone   0.611  kcal  2024-12-11 20:44:10 +0100   \n",
                "1  ActiveEnergyBurned     Iphone   0.055  kcal  2024-12-11 20:44:10 +0100   \n",
                "2   BasalEnergyBurned     Iphone  38.988  kcal  2024-12-11 20:00:46 +0100   \n",
                "3  ActiveEnergyBurned     Iphone   0.075  kcal  2024-12-11 19:51:44 +0100   \n",
                "4   BasalEnergyBurned     Iphone   8.111  kcal  2024-12-11 19:51:44 +0100   \n",
                "\n",
    
                "                     endDate               creationDate BiologicalSex  \\\n",
                "0  2024-12-11 20:44:51 +0100  2024-12-11 21:00:21 +0100           NaN   \n",
                "1  2024-12-11 20:44:51 +0100  2024-12-11 20:54:21 +0100           NaN   \n",
                "2  2024-12-11 20:44:10 +0100  2024-12-11 20:45:24 +0100           NaN   \n",
                "3  2024-12-11 20:00:46 +0100  2024-12-11 20:01:54 +0100           NaN   \n",
                "4  2024-12-11 20:00:46 +0100  2024-12-11 20:07:54 +0100           NaN   \n",
    
                "  dateComponents  key  ...  appleMoveTime DateOfBirth BloodType  \\\n",
                "0            NaN  NaN  ...            NaN         NaN       NaN   \n",
                "1            NaN  NaN  ...            NaN         NaN       NaN   \n",
                "2            NaN  NaN  ...            NaN         NaN       NaN   \n",
                "3            NaN  NaN  ...            NaN         NaN       NaN   \n",
                "4            NaN  NaN  ...            NaN         NaN       NaN   \n",
    
                "                                              device  appleStandHours  \\\n",
                "0  <<HKDevice: 0x282782df0>, name:iPhone, manufac...              NaN   \n",
                "1  <<HKDevice: 0x2827e3840>, name:iPhone, manufac...              NaN   \n",
                "2  <<HKDevice: 0x282782df0>, name:iPhone, manufac...              NaN   \n",
                "3  <<HKDevice: 0x2827e3840>, name:iPhone, manufac...              NaN   \n",
                "4  <<HKDevice: 0x282782df0>, name:iPhone, manufac...              NaN   \n",
    
                "  appleStandHoursGoal  activeEnergyBurnedGoal locale appleMoveTimeGoal  \\\n",
                "0                 NaN                     NaN    NaN               NaN   \n",
                "1                 NaN                     NaN    NaN               NaN   \n",
                "2                 NaN                     NaN    NaN               NaN   \n",
                "3                 NaN                     NaN    NaN               NaN   \n",
                "4                 NaN                     NaN    NaN               NaN   \n",
    
                "  FitzpatrickSkinType  \n",
                "0                 NaN  \n",
                "1                 NaN  \n",
                "2                 NaN  \n",
                "3                 NaN  \n",
                "4                 NaN  \n",
    
                "\n",
                "[5 rows x 26 columns]\n"
              ]
            }
          ]
        },
        {
          "cell_type": "code",
          "source": [
            "data.info()"
          ],
          "metadata": {
            "colab": {
              "base_uri": "https://localhost:8080/"
            },
            "id": "OsbTPfcraKtz",
    
            "outputId": "3087ded7-7042-4f79-ef7e-ea95822e0241"
    
          "outputs": [
            {
              "output_type": "stream",
              "name": "stdout",
              "text": [
                "<class 'pandas.core.frame.DataFrame'>\n",
                "RangeIndex: 274547 entries, 0 to 274546\n",
                "Data columns (total 26 columns):\n",
                " #   Column                       Non-Null Count   Dtype  \n",
                "---  ------                       --------------   -----  \n",
                " 0   type                         256923 non-null  object \n",
                " 1   sourceName                   256923 non-null  object \n",
                " 2   value                        273743 non-null  object \n",
                " 3   unit                         255055 non-null  object \n",
                " 4   startDate                    256923 non-null  object \n",
                " 5   endDate                      256923 non-null  object \n",
                " 6   creationDate                 256923 non-null  object \n",
    
                " 7   BiologicalSex                1 non-null       object \n",
                " 8   dateComponents               802 non-null     object \n",
                " 9   key                          16819 non-null   object \n",
                " 10  activeEnergyBurned           802 non-null     float64\n",
                " 11  sourceVersion                256923 non-null  object \n",
                " 12  CardioFitnessMedicationsUse  1 non-null       object \n",
                " 13  appleExerciseTimeGoal        802 non-null     float64\n",
                " 14  appleExerciseTime            802 non-null     float64\n",
                " 15  activeEnergyBurnedUnit       802 non-null     object \n",
                " 16  appleMoveTime                802 non-null     float64\n",
                " 17  DateOfBirth                  1 non-null       object \n",
                " 18  BloodType                    1 non-null       object \n",
                " 19  device                       255050 non-null  object \n",
                " 20  appleStandHours              802 non-null     float64\n",
                " 21  appleStandHoursGoal          802 non-null     float64\n",
                " 22  activeEnergyBurnedGoal       802 non-null     float64\n",
                " 23  locale                       1 non-null       object \n",
                " 24  appleMoveTimeGoal            802 non-null     float64\n",
                " 25  FitzpatrickSkinType          1 non-null       object \n",
    
                "dtypes: float64(8), object(18)\n",
                "memory usage: 54.5+ MB\n"
              ]
            }
          ]
        },
        {
          "cell_type": "code",
          "source": [
    
            "#nombre de lignes dans le dataframe\n",
            "num_rows = data.shape[0]\n",
            "print(\"Nombre de lignes dans le dataframe :\", num_rows)"
    
            "colab": {
              "base_uri": "https://localhost:8080/"
            },
            "id": "Q7fRVEWemNMS",
            "outputId": "4efbef3a-e9e4-4ba2-fb71-edbc894d384a"
    
          "execution_count": 68,
          "outputs": [
            {
              "output_type": "stream",
              "name": "stdout",
              "text": [
                "Nombre de lignes dans le dataframe : 274547\n"
              ]
            }
          ]
    
        },
        {
          "cell_type": "code",
          "source": [
    
            "colab": {
              "base_uri": "https://localhost:8080/"
            },
            "id": "u-ho2Dpxdz_C",
            "outputId": "c4853b2a-c82d-4f04-a2a4-fd6877789195"
    
          "execution_count": 69,
          "outputs": [
            {
              "output_type": "stream",
              "name": "stdout",
              "text": [
                "type                            17624\n",
                "sourceName                      17624\n",
                "value                             804\n",
                "unit                            19492\n",
                "startDate                       17624\n",
                "endDate                         17624\n",
                "creationDate                    17624\n",
                "BiologicalSex                  274546\n",
                "dateComponents                 273745\n",
                "key                            257728\n",
                "activeEnergyBurned             273745\n",
                "sourceVersion                   17624\n",
                "CardioFitnessMedicationsUse    274546\n",
                "appleExerciseTimeGoal          273745\n",
                "appleExerciseTime              273745\n",
                "activeEnergyBurnedUnit         273745\n",
                "appleMoveTime                  273745\n",
                "DateOfBirth                    274546\n",
                "BloodType                      274546\n",
                "device                          19497\n",
                "appleStandHours                273745\n",
                "appleStandHoursGoal            273745\n",
                "activeEnergyBurnedGoal         273745\n",
                "locale                         274546\n",
                "appleMoveTimeGoal              273745\n",
                "FitzpatrickSkinType            274546\n",
                "dtype: int64\n"
              ]
            }
          ]
    
        },
        {
          "cell_type": "code",
          "source": [
    
            "# Conversion des colonnes de date en datetime\n",
            "date_cols = ['startDate', 'endDate', 'creationDate']\n",
            "for col in date_cols:\n",
            "    data[col] = pd.to_datetime(data[col], errors='coerce')\n",
            "\n",
            "# Vérifier si la conversion a réussi\n",
            "print(data[date_cols].head())\n"
    
            "colab": {
              "base_uri": "https://localhost:8080/"
            },
            "id": "ElGeSoYZgsOs",
            "outputId": "20485e25-d63e-4005-a0c7-f942187650bf"
    
          "execution_count": 70,
          "outputs": [
            {
              "output_type": "stream",
              "name": "stdout",
              "text": [
                "                  startDate                   endDate  \\\n",
                "0 2024-12-11 20:44:10+01:00 2024-12-11 20:44:51+01:00   \n",
                "1 2024-12-11 20:44:10+01:00 2024-12-11 20:44:51+01:00   \n",
                "2 2024-12-11 20:00:46+01:00 2024-12-11 20:44:10+01:00   \n",
                "3 2024-12-11 19:51:44+01:00 2024-12-11 20:00:46+01:00   \n",
                "4 2024-12-11 19:51:44+01:00 2024-12-11 20:00:46+01:00   \n",
                "\n",
                "               creationDate  \n",
                "0 2024-12-11 21:00:21+01:00  \n",
                "1 2024-12-11 20:54:21+01:00  \n",
                "2 2024-12-11 20:45:24+01:00  \n",
                "3 2024-12-11 20:01:54+01:00  \n",
                "4 2024-12-11 20:07:54+01:00  \n"
              ]
            }
          ]
    
        },
        {
          "cell_type": "code",
          "source": [
    
            "print(data['locale'].unique())"
    
            "colab": {
              "base_uri": "https://localhost:8080/"
            },
            "id": "cxpxw8bujjtY",
            "outputId": "714b927b-7046-4461-d373-27deb202e49c"
    
          "execution_count": 71,
          "outputs": [
            {
              "output_type": "stream",
              "name": "stdout",
              "text": [
                "[nan 'fr_FR']\n"
              ]
            }
          ]
    
        },
        {
          "cell_type": "code",
          "source": [
    
          ],
          "metadata": {
            "colab": {
              "base_uri": "https://localhost:8080/"
            },
    
            "id": "5qiUiyMMj5b7",
            "outputId": "959498fc-f129-4a25-caa1-23dd5c8707a4"
    
          "outputs": [
            {
              "output_type": "stream",
              "name": "stdout",
              "text": [
    
                "[nan 'HKMetadataKeyDevicePlacementSide' 'HKAlgorithmVersion' 'HKTimeZone']\n"
    
              ]
            }
          ]
        },
        {
          "cell_type": "code",
          "source": [
    
            "# Suppression des colonnes inutiles\n",
            "data = data.drop(['sourceName', 'sourceVersion','device', 'FitzpatrickSkinType','locale','key'], axis=1)"
          ],
          "metadata": {
            "id": "ni3MNv7MhTvH"
          },
          "execution_count": 74,
          "outputs": []
        },
        {
          "cell_type": "code",
          "source": [
            "data.head(3)"
    
          ],
          "metadata": {
            "colab": {
              "base_uri": "https://localhost:8080/",
    
            "id": "n7z-808fhxl5",
            "outputId": "34ceb97d-acf9-4348-b399-2711f21a5acb"
    
          "outputs": [
            {
              "output_type": "execute_result",
              "data": {
                "text/plain": [
    
                  "                 type   value  unit                 startDate  \\\n",
                  "0   BasalEnergyBurned   0.611  kcal 2024-12-11 20:44:10+01:00   \n",
                  "1  ActiveEnergyBurned   0.055  kcal 2024-12-11 20:44:10+01:00   \n",
                  "2   BasalEnergyBurned  38.988  kcal 2024-12-11 20:00:46+01:00   \n",
                  "\n",
                  "                    endDate              creationDate BiologicalSex  \\\n",
                  "0 2024-12-11 20:44:51+01:00 2024-12-11 21:00:21+01:00           NaN   \n",
                  "1 2024-12-11 20:44:51+01:00 2024-12-11 20:54:21+01:00           NaN   \n",
                  "2 2024-12-11 20:44:10+01:00 2024-12-11 20:45:24+01:00           NaN   \n",
                  "\n",
                  "  dateComponents  activeEnergyBurned CardioFitnessMedicationsUse  \\\n",
                  "0            NaN                 NaN                         NaN   \n",
                  "1            NaN                 NaN                         NaN   \n",
                  "2            NaN                 NaN                         NaN   \n",
                  "\n",
                  "   appleExerciseTimeGoal  appleExerciseTime activeEnergyBurnedUnit  \\\n",
                  "0                    NaN                NaN                    NaN   \n",
                  "1                    NaN                NaN                    NaN   \n",
                  "2                    NaN                NaN                    NaN   \n",
                  "\n",
                  "   appleMoveTime DateOfBirth BloodType  appleStandHours  appleStandHoursGoal  \\\n",
                  "0            NaN         NaN       NaN              NaN                  NaN   \n",
                  "1            NaN         NaN       NaN              NaN                  NaN   \n",
                  "2            NaN         NaN       NaN              NaN                  NaN   \n",
                  "\n",
                  "   activeEnergyBurnedGoal  appleMoveTimeGoal  \n",
                  "0                     NaN                NaN  \n",
                  "1                     NaN                NaN  \n",
                  "2                     NaN                NaN  "
    
                  "  <div id=\"df-f5c3598d-88dc-4db7-9453-f6060a9f560a\" class=\"colab-df-container\">\n",
    
                  "    <div>\n",
                  "<style scoped>\n",
                  "    .dataframe tbody tr th:only-of-type {\n",
                  "        vertical-align: middle;\n",
                  "    }\n",
                  "\n",
                  "    .dataframe tbody tr th {\n",
                  "        vertical-align: top;\n",
                  "    }\n",
                  "\n",
                  "    .dataframe thead th {\n",
                  "        text-align: right;\n",
                  "    }\n",
                  "</style>\n",
                  "<table border=\"1\" class=\"dataframe\">\n",
                  "  <thead>\n",
                  "    <tr style=\"text-align: right;\">\n",
                  "      <th></th>\n",
                  "      <th>type</th>\n",
                  "      <th>value</th>\n",
                  "      <th>unit</th>\n",
                  "      <th>startDate</th>\n",
                  "      <th>endDate</th>\n",
                  "      <th>creationDate</th>\n",
                  "      <th>BiologicalSex</th>\n",
    
                  "      <th>dateComponents</th>\n",
                  "      <th>activeEnergyBurned</th>\n",
    
                  "      <th>CardioFitnessMedicationsUse</th>\n",
    
                  "      <th>appleExerciseTimeGoal</th>\n",
                  "      <th>appleExerciseTime</th>\n",
    
                  "      <th>activeEnergyBurnedUnit</th>\n",
    
                  "      <th>appleMoveTime</th>\n",
                  "      <th>DateOfBirth</th>\n",
    
                  "      <th>BloodType</th>\n",
    
                  "      <th>appleStandHours</th>\n",
                  "      <th>appleStandHoursGoal</th>\n",
                  "      <th>activeEnergyBurnedGoal</th>\n",
                  "      <th>appleMoveTimeGoal</th>\n",
    
                  "    </tr>\n",
                  "  </thead>\n",
                  "  <tbody>\n",
                  "    <tr>\n",
                  "      <th>0</th>\n",
    
                  "      <td>BasalEnergyBurned</td>\n",
    
                  "      <td>0.611</td>\n",
                  "      <td>kcal</td>\n",
                  "      <td>2024-12-11 20:44:10+01:00</td>\n",
                  "      <td>2024-12-11 20:44:51+01:00</td>\n",
                  "      <td>2024-12-11 21:00:21+01:00</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
    
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
    
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "    </tr>\n",
                  "    <tr>\n",
                  "      <th>1</th>\n",
    
                  "      <td>ActiveEnergyBurned</td>\n",
    
                  "      <td>0.055</td>\n",
                  "      <td>kcal</td>\n",
                  "      <td>2024-12-11 20:44:10+01:00</td>\n",
                  "      <td>2024-12-11 20:44:51+01:00</td>\n",
                  "      <td>2024-12-11 20:54:21+01:00</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
    
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
    
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "    </tr>\n",
                  "    <tr>\n",
                  "      <th>2</th>\n",
    
                  "      <td>BasalEnergyBurned</td>\n",
    
                  "      <td>38.988</td>\n",
                  "      <td>kcal</td>\n",
                  "      <td>2024-12-11 20:00:46+01:00</td>\n",
                  "      <td>2024-12-11 20:44:10+01:00</td>\n",
                  "      <td>2024-12-11 20:45:24+01:00</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "      <td>NaN</td>\n",
                  "    </tr>\n",
                  "  </tbody>\n",
                  "</table>\n",
                  "</div>\n",
                  "    <div class=\"colab-df-buttons\">\n",
                  "\n",
                  "  <div class=\"colab-df-container\">\n",
    
                  "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-f5c3598d-88dc-4db7-9453-f6060a9f560a')\"\n",
    
                  "            title=\"Convert this dataframe to an interactive table.\"\n",
                  "            style=\"display:none;\">\n",
                  "\n",
                  "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
                  "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
                  "  </svg>\n",
                  "    </button>\n",
                  "\n",
                  "  <style>\n",
                  "    .colab-df-container {\n",
                  "      display:flex;\n",
                  "      gap: 12px;\n",
                  "    }\n",
                  "\n",
                  "    .colab-df-convert {\n",
                  "      background-color: #E8F0FE;\n",
                  "      border: none;\n",
                  "      border-radius: 50%;\n",
                  "      cursor: pointer;\n",
                  "      display: none;\n",
                  "      fill: #1967D2;\n",
                  "      height: 32px;\n",
                  "      padding: 0 0 0 0;\n",
                  "      width: 32px;\n",
                  "    }\n",
                  "\n",
                  "    .colab-df-convert:hover {\n",
                  "      background-color: #E2EBFA;\n",
                  "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
                  "      fill: #174EA6;\n",
                  "    }\n",
                  "\n",
                  "    .colab-df-buttons div {\n",
                  "      margin-bottom: 4px;\n",
                  "    }\n",
                  "\n",
                  "    [theme=dark] .colab-df-convert {\n",
                  "      background-color: #3B4455;\n",
                  "      fill: #D2E3FC;\n",
                  "    }\n",
                  "\n",
                  "    [theme=dark] .colab-df-convert:hover {\n",
                  "      background-color: #434B5C;\n",
                  "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
                  "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
                  "      fill: #FFFFFF;\n",
                  "    }\n",
                  "  </style>\n",
                  "\n",
                  "    <script>\n",
                  "      const buttonEl =\n",
    
                  "        document.querySelector('#df-f5c3598d-88dc-4db7-9453-f6060a9f560a button.colab-df-convert');\n",
    
                  "      buttonEl.style.display =\n",
                  "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
                  "\n",
                  "      async function convertToInteractive(key) {\n",
    
                  "        const element = document.querySelector('#df-f5c3598d-88dc-4db7-9453-f6060a9f560a');\n",
    
                  "        const dataTable =\n",
                  "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
                  "                                                    [key], {});\n",
                  "        if (!dataTable) return;\n",
                  "\n",
                  "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
                  "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
                  "          + ' to learn more about interactive tables.';\n",
                  "        element.innerHTML = '';\n",
                  "        dataTable['output_type'] = 'display_data';\n",
                  "        await google.colab.output.renderOutput(dataTable, element);\n",
                  "        const docLink = document.createElement('div');\n",
                  "        docLink.innerHTML = docLinkHtml;\n",
                  "        element.appendChild(docLink);\n",
                  "      }\n",
                  "    </script>\n",
                  "  </div>\n",
                  "\n",
                  "\n",
    
                  "<div id=\"df-6bece9b8-2798-4604-a292-2493718d8948\">\n",
                  "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-6bece9b8-2798-4604-a292-2493718d8948')\"\n",
    
                  "            title=\"Suggest charts\"\n",
                  "            style=\"display:none;\">\n",
                  "\n",
                  "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
                  "     width=\"24px\">\n",
                  "    <g>\n",
                  "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
                  "    </g>\n",
                  "</svg>\n",
                  "  </button>\n",
                  "\n",
                  "<style>\n",
                  "  .colab-df-quickchart {\n",
                  "      --bg-color: #E8F0FE;\n",
                  "      --fill-color: #1967D2;\n",
                  "      --hover-bg-color: #E2EBFA;\n",
                  "      --hover-fill-color: #174EA6;\n",
                  "      --disabled-fill-color: #AAA;\n",
                  "      --disabled-bg-color: #DDD;\n",
                  "  }\n",
                  "\n",
                  "  [theme=dark] .colab-df-quickchart {\n",
                  "      --bg-color: #3B4455;\n",
                  "      --fill-color: #D2E3FC;\n",
                  "      --hover-bg-color: #434B5C;\n",
                  "      --hover-fill-color: #FFFFFF;\n",
                  "      --disabled-bg-color: #3B4455;\n",
                  "      --disabled-fill-color: #666;\n",
                  "  }\n",
                  "\n",
                  "  .colab-df-quickchart {\n",
                  "    background-color: var(--bg-color);\n",
                  "    border: none;\n",
                  "    border-radius: 50%;\n",
                  "    cursor: pointer;\n",
                  "    display: none;\n",
                  "    fill: var(--fill-color);\n",
                  "    height: 32px;\n",
                  "    padding: 0;\n",
                  "    width: 32px;\n",
                  "  }\n",
                  "\n",
                  "  .colab-df-quickchart:hover {\n",
                  "    background-color: var(--hover-bg-color);\n",
                  "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
                  "    fill: var(--button-hover-fill-color);\n",
                  "  }\n",
                  "\n",
                  "  .colab-df-quickchart-complete:disabled,\n",
                  "  .colab-df-quickchart-complete:disabled:hover {\n",
                  "    background-color: var(--disabled-bg-color);\n",
                  "    fill: var(--disabled-fill-color);\n",
                  "    box-shadow: none;\n",
                  "  }\n",
                  "\n",
                  "  .colab-df-spinner {\n",
                  "    border: 2px solid var(--fill-color);\n",
                  "    border-color: transparent;\n",
                  "    border-bottom-color: var(--fill-color);\n",
                  "    animation:\n",
                  "      spin 1s steps(1) infinite;\n",
                  "  }\n",
                  "\n",
                  "  @keyframes spin {\n",
                  "    0% {\n",
                  "      border-color: transparent;\n",
                  "      border-bottom-color: var(--fill-color);\n",
                  "      border-left-color: var(--fill-color);\n",
                  "    }\n",
                  "    20% {\n",
                  "      border-color: transparent;\n",
                  "      border-left-color: var(--fill-color);\n",
                  "      border-top-color: var(--fill-color);\n",
                  "    }\n",
                  "    30% {\n",
                  "      border-color: transparent;\n",
                  "      border-left-color: var(--fill-color);\n",
                  "      border-top-color: var(--fill-color);\n",
                  "      border-right-color: var(--fill-color);\n",
                  "    }\n",
                  "    40% {\n",
                  "      border-color: transparent;\n",
                  "      border-right-color: var(--fill-color);\n",
                  "      border-top-color: var(--fill-color);\n",
                  "    }\n",
                  "    60% {\n",
                  "      border-color: transparent;\n",
                  "      border-right-color: var(--fill-color);\n",
                  "    }\n",
                  "    80% {\n",
                  "      border-color: transparent;\n",
                  "      border-right-color: var(--fill-color);\n",
                  "      border-bottom-color: var(--fill-color);\n",
                  "    }\n",
                  "    90% {\n",
                  "      border-color: transparent;\n",
                  "      border-bottom-color: var(--fill-color);\n",
                  "    }\n",
                  "  }\n",
                  "</style>\n",
                  "\n",
                  "  <script>\n",
                  "    async function quickchart(key) {\n",
                  "      const quickchartButtonEl =\n",
                  "        document.querySelector('#' + key + ' button');\n",
                  "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
                  "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
                  "      try {\n",
                  "        const charts = await google.colab.kernel.invokeFunction(\n",
                  "            'suggestCharts', [key], {});\n",
                  "      } catch (error) {\n",
                  "        console.error('Error during call to suggestCharts:', error);\n",
                  "      }\n",
                  "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
                  "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
                  "    }\n",
                  "    (() => {\n",
                  "      let quickchartButtonEl =\n",
    
                  "        document.querySelector('#df-6bece9b8-2798-4604-a292-2493718d8948 button');\n",
    
                  "      quickchartButtonEl.style.display =\n",
                  "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
                  "    })();\n",
                  "  </script>\n",
                  "</div>\n",
                  "\n",
                  "    </div>\n",
                  "  </div>\n"
                ],
                "application/vnd.google.colaboratory.intrinsic+json": {
                  "type": "dataframe",
    
              "execution_count": 75
            }
          ]
        },
        {
          "cell_type": "code",
          "source": [
            "data.info()"
          ],
          "metadata": {
            "colab": {
              "base_uri": "https://localhost:8080/"
            },
            "id": "4Du3gk5vjvfC",
            "outputId": "f3a56e3c-7bb5-499f-a298-d14fa78736e7"
          },
          "execution_count": 76,
          "outputs": [
            {
              "output_type": "stream",
              "name": "stdout",
              "text": [
                "<class 'pandas.core.frame.DataFrame'>\n",
                "RangeIndex: 274547 entries, 0 to 274546\n",
                "Data columns (total 20 columns):\n",
                " #   Column                       Non-Null Count   Dtype                    \n",
                "---  ------                       --------------   -----                    \n",
                " 0   type                         256923 non-null  object                   \n",
                " 1   value                        273743 non-null  object                   \n",
                " 2   unit                         255055 non-null  object                   \n",
                " 3   startDate                    256923 non-null  datetime64[ns, UTC+01:00]\n",
                " 4   endDate                      256923 non-null  datetime64[ns, UTC+01:00]\n",
                " 5   creationDate                 256923 non-null  datetime64[ns, UTC+01:00]\n",
                " 6   BiologicalSex                1 non-null       object                   \n",
                " 7   dateComponents               802 non-null     object                   \n",
                " 8   activeEnergyBurned           802 non-null     float64                  \n",
                " 9   CardioFitnessMedicationsUse  1 non-null       object                   \n",
                " 10  appleExerciseTimeGoal        802 non-null     float64                  \n",
                " 11  appleExerciseTime            802 non-null     float64                  \n",
                " 12  activeEnergyBurnedUnit       802 non-null     object                   \n",
                " 13  appleMoveTime                802 non-null     float64                  \n",
                " 14  DateOfBirth                  1 non-null       object                   \n",
                " 15  BloodType                    1 non-null       object                   \n",
                " 16  appleStandHours              802 non-null     float64                  \n",
                " 17  appleStandHoursGoal          802 non-null     float64                  \n",
                " 18  activeEnergyBurnedGoal       802 non-null     float64                  \n",
                " 19  appleMoveTimeGoal            802 non-null     float64                  \n",
                "dtypes: datetime64[ns, UTC+01:00](3), float64(8), object(9)\n",
                "memory usage: 41.9+ MB\n"
              ]
            }
          ]
        },
        {
          "cell_type": "markdown",
          "source": [
            "## Gestion des Nan"
          ],
          "metadata": {
            "id": "5A8c6F6EkSul"
          }
        },
        {
          "cell_type": "code",
          "source": [
            "# Afficher les valeurs uniques pour la colonne 'unit'\n",
            "unique_unit = data['unit'].unique()\n",
            "print(\"Valeurs uniques de 'unit':\")\n",
            "print(unique_unit)\n",
            "\n",
            "# Afficher les valeurs uniques pour la colonne 'type'\n",
            "unique_type = data['type'].unique()\n",
            "print(\"\\nValeurs uniques de 'type':\")\n",
            "print(unique_type)\n"
          ],
          "metadata": {
            "colab": {
              "base_uri": "https://localhost:8080/"
            },
            "id": "f_prPndUkw9V",
            "outputId": "f1c4c10d-d534-4513-a0d1-81bdae826360"
          },
          "execution_count": 77,
          "outputs": [
            {
              "output_type": "stream",
              "name": "stdout",
              "text": [
                "Valeurs uniques de 'unit':\n",
                "['kcal' 'count' 'km' 'km/hr' '%' 'cm' nan 'dBASPL' 'kg' 'hr']\n",
                "\n",
                "Valeurs uniques de 'type':\n",
                "['BasalEnergyBurned' 'ActiveEnergyBurned' 'StepCount'\n",
                " 'DistanceWalkingRunning' 'WalkingSpeed' 'WalkingDoubleSupportPercentage'\n",
                " 'WalkingStepLength' 'WalkingAsymmetryPercentage' 'FlightsClimbed'\n",
                " 'SleepAnalysis' 'AppleWalkingSteadiness' 'HeadphoneAudioExposure'\n",
                " 'BodyMass' 'Height' 'HKDataTypeSleepDurationGoal' nan]\n"
              ]
            }
          ]
        },
        {
          "cell_type": "code",
          "source": [
            "# Remplacer les NaN dans 'unit' en fonction du 'type'\n",
            "data['unit'] = data.apply(lambda row: 'kcal' if pd.isna(row['unit']) and row['type'] in ['BasalEnergyBurned', 'ActiveEnergyBurned'] else\n",
            "                                  'count' if pd.isna(row['unit']) and row['type'] == 'StepCount' else\n",
            "                                  'km' if pd.isna(row['unit']) and row['type'] == 'DistanceWalkingRunning' else\n",
            "                                  'km/hr' if pd.isna(row['unit']) and row['type'] == 'WalkingSpeed' else\n",
            "                                  'inconnu', axis=1)\n",
            "\n",
            "# Convertir la colonne 'value' en numérique (les erreurs seront converties en NaN)\n",
            "data['value'] = pd.to_numeric(data['value'], errors='coerce')\n",
            "\n",
            "# Remplacer les NaN dans par la moyenne\n",
            "data['value'] = data['value'].apply(lambda x: data['value'].mean() if pd.isna(x) else x)\n",
            "\n",
            "# Vérification\n",
            "print(data['value'].head())\n",
            "\n"
          ],
          "metadata": {
            "colab": {
              "base_uri": "https://localhost:8080/"
            },
            "id": "Opa2aXr_k-iz",
            "outputId": "78b6d888-3896-4d08-bf79-757072478303"
          },
          "execution_count": 78,
          "outputs": [
            {
              "output_type": "stream",
              "name": "stdout",
              "text": [
                "0     0.611\n",
                "1     0.055\n",
                "2    38.988\n",
                "3     0.075\n",
                "4     8.111\n",
                "Name: value, dtype: float64\n"
              ]
            }
          ]
        },
        {
          "cell_type": "code",
          "source": [
            "# Colonnes numériques : remplir avec la moyenne\n",
            "num_cols = data.select_dtypes(include='number').columns\n",
            "data[num_cols] = data[num_cols].fillna(data[num_cols].mean())\n",
            "\n",
            "# Vérifier le résultat\n",
            "print(data.isna().sum())\n"
          ],
          "metadata": {
            "colab": {
              "base_uri": "https://localhost:8080/"
            },
            "id": "4D5dZbVxit-U",
            "outputId": "a7083c1a-6d34-480f-b1c8-ad2dc0e14c8c"
          },
          "execution_count": 80,
          "outputs": [
            {
              "output_type": "stream",
              "name": "stdout",
              "text": [
                "type                            17624\n",
                "value                               0\n",
                "unit                                0\n",
                "startDate                       17624\n",
                "endDate                         17624\n",
                "creationDate                    17624\n",
                "BiologicalSex                       0\n",
                "dateComponents                 273745\n",
                "activeEnergyBurned                  0\n",
                "CardioFitnessMedicationsUse    274546\n",
                "appleExerciseTimeGoal               0\n",
                "appleExerciseTime                   0\n",
                "activeEnergyBurnedUnit         273745\n",
                "appleMoveTime                       0\n",
                "DateOfBirth                    274546\n",
                "BloodType                           0\n",
                "appleStandHours                     0\n",
                "appleStandHoursGoal                 0\n",
                "activeEnergyBurnedGoal              0\n",
                "appleMoveTimeGoal                   0\n",
                "dtype: int64\n"
              ]