Newer
Older
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"execution_count": 64,
"metadata": {
"id": "pz0Tz9u2ZDn_"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"from google.colab import files"
]
},
{
"cell_type": "markdown",
"source": [
"#Exploration des données"
],
"metadata": {
"id": "7FDXPUMEkVIU"
}
},
{
"cell_type": "code",
"source": [
"# chargement des données\n",
"data = pd.read_csv('apple_health_export_2024-12-29.csv')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "wkcZbH8vZlQ8",
"outputId": "c69b2ac8-48d7-456a-a290-653489349cac"
"execution_count": 65,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"<ipython-input-65-fc50c9a5452d>:2: DtypeWarning: Columns (0,1,3,4,5,6,7,8,9,11,12,15,17,18,19,23,25) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" data = pd.read_csv('apple_health_export_2024-12-29.csv')\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# exploration des données\n",
"print(data.head())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "NiEm7PsgaErc",
"outputId": "e5648f0a-7a7e-44d4-f17f-c55229c0fbd2"
"execution_count": 66,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" type sourceName value unit startDate \\\n",
"0 BasalEnergyBurned Iphone 0.611 kcal 2024-12-11 20:44:10 +0100 \n",
"1 ActiveEnergyBurned Iphone 0.055 kcal 2024-12-11 20:44:10 +0100 \n",
"2 BasalEnergyBurned Iphone 38.988 kcal 2024-12-11 20:00:46 +0100 \n",
"3 ActiveEnergyBurned Iphone 0.075 kcal 2024-12-11 19:51:44 +0100 \n",
"4 BasalEnergyBurned Iphone 8.111 kcal 2024-12-11 19:51:44 +0100 \n",
"\n",
" endDate creationDate BiologicalSex \\\n",
"0 2024-12-11 20:44:51 +0100 2024-12-11 21:00:21 +0100 NaN \n",
"1 2024-12-11 20:44:51 +0100 2024-12-11 20:54:21 +0100 NaN \n",
"2 2024-12-11 20:44:10 +0100 2024-12-11 20:45:24 +0100 NaN \n",
"3 2024-12-11 20:00:46 +0100 2024-12-11 20:01:54 +0100 NaN \n",
"4 2024-12-11 20:00:46 +0100 2024-12-11 20:07:54 +0100 NaN \n",
" dateComponents key ... appleMoveTime DateOfBirth BloodType \\\n",
"0 NaN NaN ... NaN NaN NaN \n",
"1 NaN NaN ... NaN NaN NaN \n",
"2 NaN NaN ... NaN NaN NaN \n",
"3 NaN NaN ... NaN NaN NaN \n",
"4 NaN NaN ... NaN NaN NaN \n",
" device appleStandHours \\\n",
"0 <<HKDevice: 0x282782df0>, name:iPhone, manufac... NaN \n",
"1 <<HKDevice: 0x2827e3840>, name:iPhone, manufac... NaN \n",
"2 <<HKDevice: 0x282782df0>, name:iPhone, manufac... NaN \n",
"3 <<HKDevice: 0x2827e3840>, name:iPhone, manufac... NaN \n",
"4 <<HKDevice: 0x282782df0>, name:iPhone, manufac... NaN \n",
" appleStandHoursGoal activeEnergyBurnedGoal locale appleMoveTimeGoal \\\n",
"0 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
" FitzpatrickSkinType \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
"[5 rows x 26 columns]\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"data.info()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "OsbTPfcraKtz",
"outputId": "3087ded7-7042-4f79-ef7e-ea95822e0241"
"execution_count": 67,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 274547 entries, 0 to 274546\n",
"Data columns (total 26 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 type 256923 non-null object \n",
" 1 sourceName 256923 non-null object \n",
" 2 value 273743 non-null object \n",
" 3 unit 255055 non-null object \n",
" 4 startDate 256923 non-null object \n",
" 5 endDate 256923 non-null object \n",
" 6 creationDate 256923 non-null object \n",
" 7 BiologicalSex 1 non-null object \n",
" 8 dateComponents 802 non-null object \n",
" 9 key 16819 non-null object \n",
" 10 activeEnergyBurned 802 non-null float64\n",
" 11 sourceVersion 256923 non-null object \n",
" 12 CardioFitnessMedicationsUse 1 non-null object \n",
" 13 appleExerciseTimeGoal 802 non-null float64\n",
" 14 appleExerciseTime 802 non-null float64\n",
" 15 activeEnergyBurnedUnit 802 non-null object \n",
" 16 appleMoveTime 802 non-null float64\n",
" 17 DateOfBirth 1 non-null object \n",
" 18 BloodType 1 non-null object \n",
" 19 device 255050 non-null object \n",
" 20 appleStandHours 802 non-null float64\n",
" 21 appleStandHoursGoal 802 non-null float64\n",
" 22 activeEnergyBurnedGoal 802 non-null float64\n",
" 23 locale 1 non-null object \n",
" 24 appleMoveTimeGoal 802 non-null float64\n",
" 25 FitzpatrickSkinType 1 non-null object \n",
"dtypes: float64(8), object(18)\n",
"memory usage: 54.5+ MB\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"#nombre de lignes dans le dataframe\n",
"num_rows = data.shape[0]\n",
"print(\"Nombre de lignes dans le dataframe :\", num_rows)"
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Q7fRVEWemNMS",
"outputId": "4efbef3a-e9e4-4ba2-fb71-edbc894d384a"
"execution_count": 68,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Nombre de lignes dans le dataframe : 274547\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(data.isna().sum())"
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "u-ho2Dpxdz_C",
"outputId": "c4853b2a-c82d-4f04-a2a4-fd6877789195"
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
"execution_count": 69,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"type 17624\n",
"sourceName 17624\n",
"value 804\n",
"unit 19492\n",
"startDate 17624\n",
"endDate 17624\n",
"creationDate 17624\n",
"BiologicalSex 274546\n",
"dateComponents 273745\n",
"key 257728\n",
"activeEnergyBurned 273745\n",
"sourceVersion 17624\n",
"CardioFitnessMedicationsUse 274546\n",
"appleExerciseTimeGoal 273745\n",
"appleExerciseTime 273745\n",
"activeEnergyBurnedUnit 273745\n",
"appleMoveTime 273745\n",
"DateOfBirth 274546\n",
"BloodType 274546\n",
"device 19497\n",
"appleStandHours 273745\n",
"appleStandHoursGoal 273745\n",
"activeEnergyBurnedGoal 273745\n",
"locale 274546\n",
"appleMoveTimeGoal 273745\n",
"FitzpatrickSkinType 274546\n",
"dtype: int64\n"
]
}
]
"cell_type": "markdown",
"#Nettoyage des données :"
"id": "0RJVI87HkYdw"
}
},
{
"cell_type": "code",
"source": [
"# Conversion des colonnes de date en datetime\n",
"date_cols = ['startDate', 'endDate', 'creationDate']\n",
"for col in date_cols:\n",
" data[col] = pd.to_datetime(data[col], errors='coerce')\n",
"\n",
"# Vérifier si la conversion a réussi\n",
"print(data[date_cols].head())\n"
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ElGeSoYZgsOs",
"outputId": "20485e25-d63e-4005-a0c7-f942187650bf"
"execution_count": 70,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" startDate endDate \\\n",
"0 2024-12-11 20:44:10+01:00 2024-12-11 20:44:51+01:00 \n",
"1 2024-12-11 20:44:10+01:00 2024-12-11 20:44:51+01:00 \n",
"2 2024-12-11 20:00:46+01:00 2024-12-11 20:44:10+01:00 \n",
"3 2024-12-11 19:51:44+01:00 2024-12-11 20:00:46+01:00 \n",
"4 2024-12-11 19:51:44+01:00 2024-12-11 20:00:46+01:00 \n",
"\n",
" creationDate \n",
"0 2024-12-11 21:00:21+01:00 \n",
"1 2024-12-11 20:54:21+01:00 \n",
"2 2024-12-11 20:45:24+01:00 \n",
"3 2024-12-11 20:01:54+01:00 \n",
"4 2024-12-11 20:07:54+01:00 \n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(data['locale'].unique())"
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "cxpxw8bujjtY",
"outputId": "714b927b-7046-4461-d373-27deb202e49c"
"execution_count": 71,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[nan 'fr_FR']\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(data['key'].unique())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5qiUiyMMj5b7",
"outputId": "959498fc-f129-4a25-caa1-23dd5c8707a4"
"execution_count": 73,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[nan 'HKMetadataKeyDevicePlacementSide' 'HKAlgorithmVersion' 'HKTimeZone']\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# Suppression des colonnes inutiles\n",
"data = data.drop(['sourceName', 'sourceVersion','device', 'FitzpatrickSkinType','locale','key'], axis=1)"
],
"metadata": {
"id": "ni3MNv7MhTvH"
},
"execution_count": 74,
"outputs": []
},
{
"cell_type": "code",
"source": [
"data.head(3)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 216
"id": "n7z-808fhxl5",
"outputId": "34ceb97d-acf9-4348-b399-2711f21a5acb"
"execution_count": 75,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
" type value unit startDate \\\n",
"0 BasalEnergyBurned 0.611 kcal 2024-12-11 20:44:10+01:00 \n",
"1 ActiveEnergyBurned 0.055 kcal 2024-12-11 20:44:10+01:00 \n",
"2 BasalEnergyBurned 38.988 kcal 2024-12-11 20:00:46+01:00 \n",
"\n",
" endDate creationDate BiologicalSex \\\n",
"0 2024-12-11 20:44:51+01:00 2024-12-11 21:00:21+01:00 NaN \n",
"1 2024-12-11 20:44:51+01:00 2024-12-11 20:54:21+01:00 NaN \n",
"2 2024-12-11 20:44:10+01:00 2024-12-11 20:45:24+01:00 NaN \n",
"\n",
" dateComponents activeEnergyBurned CardioFitnessMedicationsUse \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"\n",
" appleExerciseTimeGoal appleExerciseTime activeEnergyBurnedUnit \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"\n",
" appleMoveTime DateOfBirth BloodType appleStandHours appleStandHoursGoal \\\n",
"0 NaN NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN NaN \n",
"\n",
" activeEnergyBurnedGoal appleMoveTimeGoal \n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN "
],
"text/html": [
"\n",
" <div id=\"df-f5c3598d-88dc-4db7-9453-f6060a9f560a\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>type</th>\n",
" <th>value</th>\n",
" <th>unit</th>\n",
" <th>startDate</th>\n",
" <th>endDate</th>\n",
" <th>creationDate</th>\n",
" <th>BiologicalSex</th>\n",
" <th>dateComponents</th>\n",
" <th>activeEnergyBurned</th>\n",
" <th>CardioFitnessMedicationsUse</th>\n",
" <th>appleExerciseTimeGoal</th>\n",
" <th>appleExerciseTime</th>\n",
" <th>activeEnergyBurnedUnit</th>\n",
" <th>appleMoveTime</th>\n",
" <th>DateOfBirth</th>\n",
" <th>appleStandHours</th>\n",
" <th>appleStandHoursGoal</th>\n",
" <th>activeEnergyBurnedGoal</th>\n",
" <th>appleMoveTimeGoal</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>BasalEnergyBurned</td>\n",
" <td>0.611</td>\n",
" <td>kcal</td>\n",
" <td>2024-12-11 20:44:10+01:00</td>\n",
" <td>2024-12-11 20:44:51+01:00</td>\n",
" <td>2024-12-11 21:00:21+01:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>ActiveEnergyBurned</td>\n",
" <td>0.055</td>\n",
" <td>kcal</td>\n",
" <td>2024-12-11 20:44:10+01:00</td>\n",
" <td>2024-12-11 20:44:51+01:00</td>\n",
" <td>2024-12-11 20:54:21+01:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>BasalEnergyBurned</td>\n",
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
" <td>38.988</td>\n",
" <td>kcal</td>\n",
" <td>2024-12-11 20:00:46+01:00</td>\n",
" <td>2024-12-11 20:44:10+01:00</td>\n",
" <td>2024-12-11 20:45:24+01:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-f5c3598d-88dc-4db7-9453-f6060a9f560a')\"\n",
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-f5c3598d-88dc-4db7-9453-f6060a9f560a button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-f5c3598d-88dc-4db7-9453-f6060a9f560a');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-6bece9b8-2798-4604-a292-2493718d8948\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-6bece9b8-2798-4604-a292-2493718d8948')\"\n",
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-6bece9b8-2798-4604-a292-2493718d8948 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "data"
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
"execution_count": 75
}
]
},
{
"cell_type": "code",
"source": [
"data.info()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "4Du3gk5vjvfC",
"outputId": "f3a56e3c-7bb5-499f-a298-d14fa78736e7"
},
"execution_count": 76,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 274547 entries, 0 to 274546\n",
"Data columns (total 20 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 type 256923 non-null object \n",
" 1 value 273743 non-null object \n",
" 2 unit 255055 non-null object \n",
" 3 startDate 256923 non-null datetime64[ns, UTC+01:00]\n",
" 4 endDate 256923 non-null datetime64[ns, UTC+01:00]\n",
" 5 creationDate 256923 non-null datetime64[ns, UTC+01:00]\n",
" 6 BiologicalSex 1 non-null object \n",
" 7 dateComponents 802 non-null object \n",
" 8 activeEnergyBurned 802 non-null float64 \n",
" 9 CardioFitnessMedicationsUse 1 non-null object \n",
" 10 appleExerciseTimeGoal 802 non-null float64 \n",
" 11 appleExerciseTime 802 non-null float64 \n",
" 12 activeEnergyBurnedUnit 802 non-null object \n",
" 13 appleMoveTime 802 non-null float64 \n",
" 14 DateOfBirth 1 non-null object \n",
" 15 BloodType 1 non-null object \n",
" 16 appleStandHours 802 non-null float64 \n",
" 17 appleStandHoursGoal 802 non-null float64 \n",
" 18 activeEnergyBurnedGoal 802 non-null float64 \n",
" 19 appleMoveTimeGoal 802 non-null float64 \n",
"dtypes: datetime64[ns, UTC+01:00](3), float64(8), object(9)\n",
"memory usage: 41.9+ MB\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Gestion des Nan"
],
"metadata": {
"id": "5A8c6F6EkSul"
}
},
{
"cell_type": "code",
"source": [
"# Afficher les valeurs uniques pour la colonne 'unit'\n",
"unique_unit = data['unit'].unique()\n",
"print(\"Valeurs uniques de 'unit':\")\n",
"print(unique_unit)\n",
"\n",
"# Afficher les valeurs uniques pour la colonne 'type'\n",
"unique_type = data['type'].unique()\n",
"print(\"\\nValeurs uniques de 'type':\")\n",
"print(unique_type)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "f_prPndUkw9V",
"outputId": "f1c4c10d-d534-4513-a0d1-81bdae826360"
},
"execution_count": 77,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Valeurs uniques de 'unit':\n",
"['kcal' 'count' 'km' 'km/hr' '%' 'cm' nan 'dBASPL' 'kg' 'hr']\n",
"\n",
"Valeurs uniques de 'type':\n",
"['BasalEnergyBurned' 'ActiveEnergyBurned' 'StepCount'\n",
" 'DistanceWalkingRunning' 'WalkingSpeed' 'WalkingDoubleSupportPercentage'\n",
" 'WalkingStepLength' 'WalkingAsymmetryPercentage' 'FlightsClimbed'\n",
" 'SleepAnalysis' 'AppleWalkingSteadiness' 'HeadphoneAudioExposure'\n",
" 'BodyMass' 'Height' 'HKDataTypeSleepDurationGoal' nan]\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# Remplacer les NaN dans 'unit' en fonction du 'type'\n",
"data['unit'] = data.apply(lambda row: 'kcal' if pd.isna(row['unit']) and row['type'] in ['BasalEnergyBurned', 'ActiveEnergyBurned'] else\n",
" 'count' if pd.isna(row['unit']) and row['type'] == 'StepCount' else\n",
" 'km' if pd.isna(row['unit']) and row['type'] == 'DistanceWalkingRunning' else\n",
" 'km/hr' if pd.isna(row['unit']) and row['type'] == 'WalkingSpeed' else\n",
" 'inconnu', axis=1)\n",
"\n",
"# Convertir la colonne 'value' en numérique (les erreurs seront converties en NaN)\n",
"data['value'] = pd.to_numeric(data['value'], errors='coerce')\n",
"\n",
"# Remplacer les NaN dans par la moyenne\n",
"data['value'] = data['value'].apply(lambda x: data['value'].mean() if pd.isna(x) else x)\n",
"\n",
"# Vérification\n",
"print(data['value'].head())\n",
"\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Opa2aXr_k-iz",
"outputId": "78b6d888-3896-4d08-bf79-757072478303"
},
"execution_count": 78,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"0 0.611\n",
"1 0.055\n",
"2 38.988\n",
"3 0.075\n",
"4 8.111\n",
"Name: value, dtype: float64\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# Colonnes numériques : remplir avec la moyenne\n",
"num_cols = data.select_dtypes(include='number').columns\n",
"data[num_cols] = data[num_cols].fillna(data[num_cols].mean())\n",
"\n",
"# Vérifier le résultat\n",
"print(data.isna().sum())\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "4D5dZbVxit-U",
"outputId": "a7083c1a-6d34-480f-b1c8-ad2dc0e14c8c"
},
"execution_count": 80,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"type 17624\n",
"value 0\n",
"unit 0\n",
"startDate 17624\n",
"endDate 17624\n",
"creationDate 17624\n",
"BiologicalSex 0\n",
"dateComponents 273745\n",
"activeEnergyBurned 0\n",
"CardioFitnessMedicationsUse 274546\n",
"appleExerciseTimeGoal 0\n",
"appleExerciseTime 0\n",
"activeEnergyBurnedUnit 273745\n",
"appleMoveTime 0\n",
"DateOfBirth 274546\n",
"BloodType 0\n",
"appleStandHours 0\n",
"appleStandHoursGoal 0\n",
"activeEnergyBurnedGoal 0\n",
"appleMoveTimeGoal 0\n",
"dtype: int64\n"
]