merge the changes of dashboard with GAP

This commit is contained in:
2026-05-04 14:46:47 +02:00
parent 90d411f086
commit 09808f1fd4
+321
View File
@@ -867,6 +867,176 @@ fig.subplots_adjust(hspace=0.7)
plt.show() plt.show()
## ##
<<<<<<< Updated upstream
=======
# %% Dashboard Angepasst
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
from matplotlib.gridspec import GridSpec
def to_numeric_comma(s: pd.Series) -> pd.Series:
# accepts 1.5 and 1,5
return pd.to_numeric(s.astype(str).str.replace(",", ".", regex=False), errors="coerce")
# Load the data
file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv'
df = pd.read_csv(file_path, sep='\t')
# Rename columns to remove 'result.' prefix and replace spaces
column_mapping = {}
for col in df.columns:
if col.startswith('result.'):
new_name = col.replace('result.', '').replace(' ', '_')
column_mapping[col] = new_name
df = df.rename(columns=column_mapping)
# Parse MedDatum safely
df['MedDatum'] = pd.to_datetime(df['MedDatum'], errors='coerce')
# Patient
patient_id = '3d942c60'
patient_data = df[df['unique_id'] == patient_id].sort_values('MedDatum').copy()
if patient_data.empty:
raise ValueError(f"No data found for patient: {patient_id}")
# Functional systems + EDSS
edss_col, edss_title = ('GT.EDSS', 'EDSS')
functional_systems = [
('GT.VISUAL_OPTIC_FUNCTIONS', 'Visual / Optic'),
('GT.CEREBELLAR_FUNCTIONS', 'Cerebellar'),
('GT.BRAINSTEM_FUNCTIONS', 'Brainstem'),
('GT.SENSORY_FUNCTIONS', 'Sensory'),
('GT.PYRAMIDAL_FUNCTIONS', 'Pyramidal (Motor)'),
('GT.AMBULATION', 'Ambulation'),
('GT.CEREBRAL_FUNCTIONS', 'Cerebral'),
('GT.BOWEL_AND_BLADDER_FUNCTIONS', 'Bowel & Bladder'),
]
# y-axis max rules
ymax_by_col = {
'GT.PYRAMIDAL_FUNCTIONS': 6,
'GT.SENSORY_FUNCTIONS': 6,
'GT.BOWEL_AND_BLADDER_FUNCTIONS': 6,
'GT.VISUAL_OPTIC_FUNCTIONS': 6,
'GT.CEREBELLAR_FUNCTIONS': 5,
'GT.CEREBRAL_FUNCTIONS': 5,
'GT.BRAINSTEM_FUNCTIONS': 5,
'GT.EDSS': 10,
}
default_ymax = 6
# ---------- Build shared visit dates ticks ----------
# Use ALL patient visit dates, not only dates with valid numeric values
event_dates = sorted(patient_data['MedDatum'].dropna().drop_duplicates().tolist())
max_ticks = 8
if len(event_dates) > max_ticks:
idx = np.linspace(0, len(event_dates) - 1, max_ticks, dtype=int)
event_dates = [event_dates[i] for i in idx]
# Base timeline for plotting: one row per patient visit date
timeline = (
patient_data[['MedDatum']]
.dropna()
.drop_duplicates()
.sort_values('MedDatum')
.rename(columns={'MedDatum': 'x'})
)
# ---------- A4 figure ----------
fig = plt.figure(figsize=(11.69, 8.27))
gs = GridSpec(nrows=3, ncols=4, figure=fig, height_ratios=[2.0, 1.0, 1.0], hspace=0.5, wspace=0.35)
def style_time_axis(ax, show_labels=True):
ax.set_xticks(event_dates)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
ax.tick_params(axis='x', rotation=30, labelsize=8, pad=2)
if not show_labels:
ax.tick_params(labelbottom=False)
def get_plot_df(patient_data, col):
"""
Keep all visit dates.
Missing values stay NaN so matplotlib draws gaps instead of zeros.
"""
tmp = patient_data[['MedDatum', col]].copy()
tmp = tmp.rename(columns={'MedDatum': 'x', col: 'raw_y'})
tmp['y'] = to_numeric_comma(tmp['raw_y'])
# aggregate if multiple rows exist on same date
tmp = tmp.groupby('x', as_index=False)['y'].max()
# merge onto full timeline so all dates remain visible
plot_df = timeline.merge(tmp, on='x', how='left').sort_values('x')
return plot_df
# ---------- EDSS main plot ----------
ax_main = fig.add_subplot(gs[0, :])
ax_main.set_title(edss_title, fontsize=14, fontweight='bold')
ax_main.set_ylabel("Score")
ax_main.set_ylim(0, ymax_by_col.get(edss_col, default_ymax))
ax_main.grid(True, alpha=0.3)
if edss_col in patient_data.columns:
plot_df = get_plot_df(patient_data, edss_col)
if plot_df['y'].notna().any():
# NaNs create visible gaps in the line
ax_main.plot(plot_df["x"], plot_df["y"], marker='o', linewidth=3, color='tab:red')
else:
ax_main.set_title("EDSS (no numeric data)", fontsize=14, fontweight='bold')
else:
ax_main.set_title("EDSS (missing column GT.EDSS)", fontsize=14, fontweight='bold')
style_time_axis(ax_main)
# ---------- Small aligned plots ----------
small_axes = []
for k, (col, title) in enumerate(functional_systems):
r = 1 + (k // 4)
c = (k % 4)
ax = fig.add_subplot(gs[r, c], sharex=ax_main)
small_axes.append(ax)
ymax = ymax_by_col.get(col, default_ymax)
ax.set_title(title, fontsize=10)
ax.set_ylabel("Score")
ax.set_ylim(0, ymax)
ax.grid(True, alpha=0.3)
if col in patient_data.columns:
plot_df = get_plot_df(patient_data, col)
if plot_df['y'].notna().any():
# NaNs remain in y -> line breaks where data is missing
ax.plot(plot_df["x"], plot_df["y"], marker='o', linewidth=2, color='tab:blue')
else:
ax.set_title(f"{title} (no numeric data)", fontsize=10)
else:
ax.set_title(f"{title} (missing)", fontsize=10)
style_time_axis(ax)
# Hide x tick labels on first row of small plots
for ax in small_axes[:4]:
ax.tick_params(labelbottom=False)
plt.tight_layout()
fig.subplots_adjust(hspace=0.7)
plt.show()
##
>>>>>>> Stashed changes
# %% Table # %% Table
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
@@ -1982,6 +2152,157 @@ plt.savefig(figure_save_path, format='svg', bbox_inches='tight')
plt.show() plt.show()
## ##
<<<<<<< Updated upstream
=======
# %% Functional System + EDSS Error Boxplots
import pandas as pd
import matplotlib.pyplot as plt
import os
import numpy as np
from matplotlib.patches import Patch
from matplotlib.lines import Line2D
# --- Configuration & Theme ---
plt.rcParams['font.family'] = 'Arial'
figure_save_path = 'project/visuals/functional_systems_edss_boxplot.svg'
# ------------------------------------------------------------
# Expect functional_systems_to_plot like:
# [
# ('GT.VISUAL_OPTIC_FUNCTIONS', 'result.VISUAL_OPTIC_FUNCTIONS'),
# ...
# ]
#
# Add EDSS here:
# ------------------------------------------------------------
all_systems_to_plot = list(functional_systems_to_plot) + [
('GT.EDSS', 'result.EDSS')
]
# --- 1. Build error data for boxplots ---
boxplot_data = []
system_labels = []
sample_sizes = []
for gt_col, res_col in all_systems_to_plot:
# Skip safely if a column is missing
if gt_col not in df.columns or res_col not in df.columns:
print(f"Skipping missing columns: {gt_col}, {res_col}")
continue
sys_name = gt_col.split('.')[1]
# Robust parsing
gt = df[gt_col].apply(safe_parse)
res = df[res_col].apply(safe_parse)
# Error = result - ground truth
error = (res - gt).dropna()
# Ignore all 0 errors
error = error[error != 0]
# Keep only systems that actually have non-zero data
if len(error) > 0:
if sys_name == 'EDSS':
clean_name = 'EDSS'
else:
clean_name = sys_name.replace('_', ' ').title()
boxplot_data.append(error.values)
system_labels.append(clean_name)
sample_sizes.append(len(error))
# Safety check
if not boxplot_data:
raise ValueError("No valid non-zero error data available for any functional system or EDSS.")
# Put n into x-axis labels so it doesn't overlap the plot
xtick_labels = [f"{label}\n(n={n})" for label, n in zip(system_labels, sample_sizes)]
# --- 2. Plotting ---
fig, ax = plt.subplots(figsize=(15, 8))
bp = ax.boxplot(
boxplot_data,
vert=True,
patch_artist=True,
labels=xtick_labels,
showmeans=True,
meanline=False
)
# --- 3. Styling ---
box_face = '#D6EAF8'
box_edge = '#2980B9'
whisker_col = '#7F8C8D'
median_col = '#C0392B'
mean_col = '#1ABC9C'
flier_face = '#95A5A6'
flier_edge = '#7F8C8D'
for box in bp['boxes']:
box.set(facecolor=box_face, edgecolor=box_edge, linewidth=1.5)
for whisker in bp['whiskers']:
whisker.set(color=whisker_col, linewidth=1.2)
for cap in bp['caps']:
cap.set(color=whisker_col, linewidth=1.2)
for median in bp['medians']:
median.set(color=median_col, linewidth=2)
for mean in bp['means']:
mean.set(marker='o', markerfacecolor=mean_col, markeredgecolor='black', markersize=6)
for flier in bp['fliers']:
flier.set(marker='o', markerfacecolor=flier_face, markeredgecolor=flier_edge, alpha=0.6, markersize=4)
# Reference line at zero error
ax.axhline(0, color='black', linewidth=1.2, linestyle='--')
# Labels and formatting
ax.set_xlabel('Functional System / EDSS', fontsize=11, fontweight='bold')
ax.set_ylabel('Error (Result - Ground Truth)', fontsize=11, fontweight='bold')
# Rotate x labels for readability
plt.xticks(rotation=45, ha='right')
# Grid and spines
ax.yaxis.grid(True, linestyle='--', alpha=0.3)
for spine in ['top', 'right']:
ax.spines[spine].set_visible(False)
# --- 4. Legend above the plot, outside the axes ---
legend_handles = [
Patch(facecolor=box_face, edgecolor=box_edge, label='IQR (25th-75th percentile)'),
Line2D([0], [0], color=median_col, lw=2, label='Median'),
Line2D([0], [0], marker='o', color='w', markerfacecolor=mean_col,
markeredgecolor='black', markersize=7, label='Mean'),
Line2D([0], [0], marker='o', color='w', markerfacecolor=flier_face,
markeredgecolor=flier_edge, alpha=0.8, markersize=6, label='Outlier'),
Line2D([0], [0], color='black', lw=1.2, linestyle='--', label='Zero error reference')
]
ax.legend(
handles=legend_handles,
loc='lower center',
bbox_to_anchor=(0.5, 1.02),
ncol=3,
frameon=False
)
# Leave room at the top for the legend
plt.tight_layout(rect=[0, 0, 1, 0.90])
# Optional save
os.makedirs(os.path.dirname(figure_save_path), exist_ok=True)
plt.savefig(figure_save_path, format='svg', bbox_inches='tight')
plt.show()
##
>>>>>>> Stashed changes
# %% test # %% test
# Diagnose: what are the actual differences? # Diagnose: what are the actual differences?