merge the changes of dashboard with GAP

2026-05-04 14:46:47 +02:00
parent 90d411f086
commit 09808f1fd4
1 changed files with 321 additions and 0 deletions
@@ -867,6 +867,176 @@ fig.subplots_adjust(hspace=0.7)
 plt.show()
 ##
 <<<<<<< Updated upstream
 =======
 # %% Dashboard Angepasst
 import pandas as pd
 import matplotlib.pyplot as plt
 import matplotlib.dates as mdates
 import numpy as np
 from matplotlib.gridspec import GridSpec
 def to_numeric_comma(s: pd.Series) -> pd.Series:
    # accepts 1.5 and 1,5
    return pd.to_numeric(s.astype(str).str.replace(",", ".", regex=False), errors="coerce")
 # Load the data
 file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv'
 df = pd.read_csv(file_path, sep='\t')
 # Rename columns to remove 'result.' prefix and replace spaces
 column_mapping = {}
 for col in df.columns:
    if col.startswith('result.'):
        new_name = col.replace('result.', '').replace(' ', '_')
        column_mapping[col] = new_name
 df = df.rename(columns=column_mapping)
 # Parse MedDatum safely
 df['MedDatum'] = pd.to_datetime(df['MedDatum'], errors='coerce')
 # Patient
 patient_id = '3d942c60'
 patient_data = df[df['unique_id'] == patient_id].sort_values('MedDatum').copy()
 if patient_data.empty:
    raise ValueError(f"No data found for patient: {patient_id}")
 # Functional systems + EDSS
 edss_col, edss_title = ('GT.EDSS', 'EDSS')
 functional_systems = [
    ('GT.VISUAL_OPTIC_FUNCTIONS', 'Visual / Optic'),
    ('GT.CEREBELLAR_FUNCTIONS', 'Cerebellar'),
    ('GT.BRAINSTEM_FUNCTIONS', 'Brainstem'),
    ('GT.SENSORY_FUNCTIONS', 'Sensory'),
    ('GT.PYRAMIDAL_FUNCTIONS', 'Pyramidal (Motor)'),
    ('GT.AMBULATION', 'Ambulation'),
    ('GT.CEREBRAL_FUNCTIONS', 'Cerebral'),
    ('GT.BOWEL_AND_BLADDER_FUNCTIONS', 'Bowel & Bladder'),
 ]
 # y-axis max rules
 ymax_by_col = {
    'GT.PYRAMIDAL_FUNCTIONS': 6,
    'GT.SENSORY_FUNCTIONS': 6,
    'GT.BOWEL_AND_BLADDER_FUNCTIONS': 6,
    'GT.VISUAL_OPTIC_FUNCTIONS': 6,
    'GT.CEREBELLAR_FUNCTIONS': 5,
    'GT.CEREBRAL_FUNCTIONS': 5,
    'GT.BRAINSTEM_FUNCTIONS': 5,
    'GT.EDSS': 10,
 }
 default_ymax = 6
 # ---------- Build shared visit dates ticks ----------
 # Use ALL patient visit dates, not only dates with valid numeric values
 event_dates = sorted(patient_data['MedDatum'].dropna().drop_duplicates().tolist())
 max_ticks = 8
 if len(event_dates) > max_ticks:
    idx = np.linspace(0, len(event_dates) - 1, max_ticks, dtype=int)
    event_dates = [event_dates[i] for i in idx]
 # Base timeline for plotting: one row per patient visit date
 timeline = (
    patient_data[['MedDatum']]
    .dropna()
    .drop_duplicates()
    .sort_values('MedDatum')
    .rename(columns={'MedDatum': 'x'})
 )
 # ---------- A4 figure ----------
 fig = plt.figure(figsize=(11.69, 8.27))
 gs = GridSpec(nrows=3, ncols=4, figure=fig, height_ratios=[2.0, 1.0, 1.0], hspace=0.5, wspace=0.35)
 def style_time_axis(ax, show_labels=True):
    ax.set_xticks(event_dates)
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
    ax.tick_params(axis='x', rotation=30, labelsize=8, pad=2)
    if not show_labels:
        ax.tick_params(labelbottom=False)
 def get_plot_df(patient_data, col):
    """
    Keep all visit dates.
    Missing values stay NaN so matplotlib draws gaps instead of zeros.
    """
    tmp = patient_data[['MedDatum', col]].copy()
    tmp = tmp.rename(columns={'MedDatum': 'x', col: 'raw_y'})
    tmp['y'] = to_numeric_comma(tmp['raw_y'])
    # aggregate if multiple rows exist on same date
    tmp = tmp.groupby('x', as_index=False)['y'].max()
    # merge onto full timeline so all dates remain visible
    plot_df = timeline.merge(tmp, on='x', how='left').sort_values('x')
    return plot_df
 # ---------- EDSS main plot ----------
 ax_main = fig.add_subplot(gs[0, :])
 ax_main.set_title(edss_title, fontsize=14, fontweight='bold')
 ax_main.set_ylabel("Score")
 ax_main.set_ylim(0, ymax_by_col.get(edss_col, default_ymax))
 ax_main.grid(True, alpha=0.3)
 if edss_col in patient_data.columns:
    plot_df = get_plot_df(patient_data, edss_col)
    if plot_df['y'].notna().any():
        # NaNs create visible gaps in the line
        ax_main.plot(plot_df["x"], plot_df["y"], marker='o', linewidth=3, color='tab:red')
    else:
        ax_main.set_title("EDSS (no numeric data)", fontsize=14, fontweight='bold')
 else:
    ax_main.set_title("EDSS (missing column GT.EDSS)", fontsize=14, fontweight='bold')
 style_time_axis(ax_main)
 # ---------- Small aligned plots ----------
 small_axes = []
 for k, (col, title) in enumerate(functional_systems):
    r = 1 + (k // 4)
    c = (k % 4)
    ax = fig.add_subplot(gs[r, c], sharex=ax_main)
    small_axes.append(ax)
    ymax = ymax_by_col.get(col, default_ymax)
    ax.set_title(title, fontsize=10)
    ax.set_ylabel("Score")
    ax.set_ylim(0, ymax)
    ax.grid(True, alpha=0.3)
    if col in patient_data.columns:
        plot_df = get_plot_df(patient_data, col)
        if plot_df['y'].notna().any():
            # NaNs remain in y -> line breaks where data is missing
            ax.plot(plot_df["x"], plot_df["y"], marker='o', linewidth=2, color='tab:blue')
        else:
            ax.set_title(f"{title} (no numeric data)", fontsize=10)
    else:
        ax.set_title(f"{title} (missing)", fontsize=10)
    style_time_axis(ax)
 # Hide x tick labels on first row of small plots
 for ax in small_axes[:4]:
    ax.tick_params(labelbottom=False)
 plt.tight_layout()
 fig.subplots_adjust(hspace=0.7)
 plt.show()
 ##
 >>>>>>> Stashed changes
 # %% Table 
 import pandas as pd
 import matplotlib.pyplot as plt
@@ -1982,6 +2152,157 @@ plt.savefig(figure_save_path, format='svg', bbox_inches='tight')
 plt.show()
 ##
 <<<<<<< Updated upstream
 =======
 # %% Functional System + EDSS Error Boxplots
 import pandas as pd
 import matplotlib.pyplot as plt
 import os
 import numpy as np
 from matplotlib.patches import Patch
 from matplotlib.lines import Line2D
 # --- Configuration & Theme ---
 plt.rcParams['font.family'] = 'Arial'
 figure_save_path = 'project/visuals/functional_systems_edss_boxplot.svg'
 # ------------------------------------------------------------
 # Expect functional_systems_to_plot like:
 # [
 #   ('GT.VISUAL_OPTIC_FUNCTIONS', 'result.VISUAL_OPTIC_FUNCTIONS'),
 #   ...
 # ]
 #
 # Add EDSS here:
 # ------------------------------------------------------------
 all_systems_to_plot = list(functional_systems_to_plot) + [
    ('GT.EDSS', 'result.EDSS')
 ]
 # --- 1. Build error data for boxplots ---
 boxplot_data = []
 system_labels = []
 sample_sizes = []
 for gt_col, res_col in all_systems_to_plot:
    # Skip safely if a column is missing
    if gt_col not in df.columns or res_col not in df.columns:
        print(f"Skipping missing columns: {gt_col}, {res_col}")
        continue
    sys_name = gt_col.split('.')[1]
    # Robust parsing
    gt = df[gt_col].apply(safe_parse)
    res = df[res_col].apply(safe_parse)
    # Error = result - ground truth
    error = (res - gt).dropna()
    # Ignore all 0 errors
    error = error[error != 0]
    # Keep only systems that actually have non-zero data
    if len(error) > 0:
        if sys_name == 'EDSS':
            clean_name = 'EDSS'
        else:
            clean_name = sys_name.replace('_', ' ').title()
        boxplot_data.append(error.values)
        system_labels.append(clean_name)
        sample_sizes.append(len(error))
 # Safety check
 if not boxplot_data:
    raise ValueError("No valid non-zero error data available for any functional system or EDSS.")
 # Put n into x-axis labels so it doesn't overlap the plot
 xtick_labels = [f"{label}\n(n={n})" for label, n in zip(system_labels, sample_sizes)]
 # --- 2. Plotting ---
 fig, ax = plt.subplots(figsize=(15, 8))
 bp = ax.boxplot(
    boxplot_data,
    vert=True,
    patch_artist=True,
    labels=xtick_labels,
    showmeans=True,
    meanline=False
 )
 # --- 3. Styling ---
 box_face = '#D6EAF8'
 box_edge = '#2980B9'
 whisker_col = '#7F8C8D'
 median_col = '#C0392B'
 mean_col = '#1ABC9C'
 flier_face = '#95A5A6'
 flier_edge = '#7F8C8D'
 for box in bp['boxes']:
    box.set(facecolor=box_face, edgecolor=box_edge, linewidth=1.5)
 for whisker in bp['whiskers']:
    whisker.set(color=whisker_col, linewidth=1.2)
 for cap in bp['caps']:
    cap.set(color=whisker_col, linewidth=1.2)
 for median in bp['medians']:
    median.set(color=median_col, linewidth=2)
 for mean in bp['means']:
    mean.set(marker='o', markerfacecolor=mean_col, markeredgecolor='black', markersize=6)
 for flier in bp['fliers']:
    flier.set(marker='o', markerfacecolor=flier_face, markeredgecolor=flier_edge, alpha=0.6, markersize=4)
 # Reference line at zero error
 ax.axhline(0, color='black', linewidth=1.2, linestyle='--')
 # Labels and formatting
 ax.set_xlabel('Functional System / EDSS', fontsize=11, fontweight='bold')
 ax.set_ylabel('Error (Result - Ground Truth)', fontsize=11, fontweight='bold')
 # Rotate x labels for readability
 plt.xticks(rotation=45, ha='right')
 # Grid and spines
 ax.yaxis.grid(True, linestyle='--', alpha=0.3)
 for spine in ['top', 'right']:
    ax.spines[spine].set_visible(False)
 # --- 4. Legend above the plot, outside the axes ---
 legend_handles = [
    Patch(facecolor=box_face, edgecolor=box_edge, label='IQR (25th-75th percentile)'),
    Line2D([0], [0], color=median_col, lw=2, label='Median'),
    Line2D([0], [0], marker='o', color='w', markerfacecolor=mean_col,
           markeredgecolor='black', markersize=7, label='Mean'),
    Line2D([0], [0], marker='o', color='w', markerfacecolor=flier_face,
           markeredgecolor=flier_edge, alpha=0.8, markersize=6, label='Outlier'),
    Line2D([0], [0], color='black', lw=1.2, linestyle='--', label='Zero error reference')
 ]
 ax.legend(
    handles=legend_handles,
    loc='lower center',
    bbox_to_anchor=(0.5, 1.02),
    ncol=3,
    frameon=False
 )
 # Leave room at the top for the legend
 plt.tight_layout(rect=[0, 0, 1, 0.90])
 # Optional save
 os.makedirs(os.path.dirname(figure_save_path), exist_ok=True)
 plt.savefig(figure_save_path, format='svg', bbox_inches='tight')
 plt.show()
 ##
 >>>>>>> Stashed changes
 # %% test
 # Diagnose: what are the actual differences?