Config Dashbprard

This commit is contained in:
2026-02-23 18:19:50 +01:00
parent 118e3e63b3
commit 816c50e467
2 changed files with 124 additions and 97 deletions

View File

@@ -718,128 +718,155 @@ plt.show()
##
# %% Dashboard
# %% Dashboard
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import matplotlib.dates as mdates
import numpy as np
from matplotlib.gridspec import GridSpec
def to_numeric_comma(s: pd.Series) -> pd.Series:
# accepts 1.5 and 1,5
return pd.to_numeric(s.astype(str).str.replace(",", ".", regex=False), errors="coerce")
# Load the data
file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv'
df = pd.read_csv(file_path, sep='\t')
# Rename columns to remove 'result.' prefix and handle spaces
# Rename columns to remove 'result.' prefix and replace spaces
column_mapping = {}
for col in df.columns:
if col.startswith('result.'):
new_name = col.replace('result.', '')
# Handle spaces in column names (replace with underscores if needed)
new_name = new_name.replace(' ', '_')
new_name = col.replace('result.', '').replace(' ', '_')
column_mapping[col] = new_name
df = df.rename(columns=column_mapping)
# Convert MedDatum to datetime
df['MedDatum'] = pd.to_datetime(df['MedDatum'])
# Parse MedDatum safely
df['MedDatum'] = pd.to_datetime(df['MedDatum'], errors='coerce')
# Check what columns actually exist in the dataset
print("Available columns:")
print(df.columns.tolist())
print("\nFirst few rows:")
print(df.head())
# Patient
patient_id = '6389d658'
patient_data = df[df['unique_id'] == patient_id].sort_values('MedDatum').copy()
if patient_data.empty:
raise ValueError(f"No data found for patient: {patient_id}")
# Hardcode specific patient names
patient_names = ['2bf8486d']
# Functional systems + EDSS
edss_col, edss_title = ('GT.EDSS', 'EDSS')
# Define the functional systems (columns to plot) - adjust based on actual column names
functional_systems = ['EDSS', 'Visual', 'Sensory', 'Motor', 'Brainstem', 'Cerebellar', 'Autonomic', 'Bladder', 'Intellectual']
functional_systems = [
('GT.VISUAL_OPTIC_FUNCTIONS', 'Visual / Optic'),
('GT.CEREBELLAR_FUNCTIONS', 'Cerebellar'),
('GT.BRAINSTEM_FUNCTIONS', 'Brainstem'),
('GT.SENSORY_FUNCTIONS', 'Sensory'),
('GT.PYRAMIDAL_FUNCTIONS', 'Pyramidal (Motor)'),
('GT.AMBULATION', 'Ambulation'),
('GT.CEREBRAL_FUNCTIONS', 'Cerebral'),
('GT.BOWEL_AND_BLADDER_FUNCTIONS', 'Bowel & Bladder'),
]
# Create subplots horizontally (2 columns, adjust rows as needed)
num_plots = len(functional_systems)
num_cols = 2
num_rows = (num_plots + num_cols - 1) // num_cols # Ceiling division
# y-axis max rules
ymax_by_col = {
'GT.PYRAMIDAL_FUNCTIONS': 6,
'GT.SENSORY_FUNCTIONS': 6,
'GT.BOWEL_AND_BLADDER_FUNCTIONS': 6,
'GT.VISUAL_OPTIC_FUNCTIONS': 6,
'GT.CEREBELLAR_FUNCTIONS': 5,
'GT.CEREBRAL_FUNCTIONS': 5,
'GT.BRAINSTEM_FUNCTIONS': 5,
'GT.EDSS': 10,
}
default_ymax = 6
fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 4*num_rows), sharex=False) # Changed sharex=False
if num_plots == 1:
axes = [axes]
elif num_rows == 1:
axes = axes
else:
axes = axes.flatten()
# ---------- Build shared "event dates" ticks ----------
cols_for_dates = [edss_col] + [c for c, _ in functional_systems]
event_dates = []
# Plot for the hardcoded patient
for i, system in enumerate(functional_systems):
# Filter data for this specific patient
patient_data = df[df['unique_id'] == patient_names[0]].sort_values('MedDatum')
for c in cols_for_dates:
if c in patient_data.columns:
y = to_numeric_comma(patient_data[c]) # <-- changed
x = patient_data['MedDatum']
tmp = pd.DataFrame({"x": x, "y": y}).dropna(subset=["x", "y"])
event_dates.extend(tmp["x"].tolist())
# Check if patient data exists
if patient_data.empty:
print(f"No data found for patient: {patient_names[0]}")
continue
event_dates = sorted(pd.Series(event_dates).drop_duplicates().tolist())
# Check if the system column exists in the data
if system in patient_data.columns:
# Plot the specific functional system
if not patient_data[system].isna().all():
axes[i].plot(patient_data['MedDatum'], patient_data[system], marker='o', linewidth=2, label=system)
axes[i].set_ylabel('Score')
axes[i].set_title(f'Functional System: {system}')
axes[i].grid(True, alpha=0.3)
axes[i].legend()
else:
axes[i].set_title(f'Functional System: {system} (No data)')
axes[i].set_ylabel('Score')
axes[i].grid(True, alpha=0.3)
max_ticks = 8
if len(event_dates) > max_ticks:
idx = np.linspace(0, len(event_dates) - 1, max_ticks, dtype=int)
event_dates = [event_dates[i] for i in idx]
# ---------- A4 figure ----------
fig = plt.figure(figsize=(11.69, 8.27))
gs = GridSpec(nrows=3, ncols=4, figure=fig, height_ratios=[2.0, 1.0, 1.0], hspace=0.5, wspace=0.35)
def style_time_axis(ax, show_labels=True):
ax.set_xticks(event_dates)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
ax.tick_params(axis='x', rotation=30, labelsize=8, pad=2)
if not show_labels:
ax.tick_params(labelbottom=False)
# ---------- EDSS main plot ----------
ax_main = fig.add_subplot(gs[0, :])
if edss_col in patient_data.columns:
y = to_numeric_comma(patient_data[edss_col]) # <-- changed
x = patient_data['MedDatum']
plot_df = pd.DataFrame({"x": x, "y": y}).dropna(subset=["x", "y"]).sort_values("x")
ax_main.set_title(edss_title, fontsize=14, fontweight='bold')
ax_main.set_ylabel("Score")
ax_main.set_ylim(0, ymax_by_col.get(edss_col, default_ymax))
ax_main.grid(True, alpha=0.3)
if not plot_df.empty:
ax_main.plot(plot_df["x"], plot_df["y"], marker='o', linewidth=3, color='tab:red')
else:
# Try to find column with similar name (case insensitive)
found_column = None
for col in df.columns:
if system.lower() in col.lower():
found_column = col
break
ax_main.set_title("EDSS (no numeric data)", fontsize=14, fontweight='bold')
else:
ax_main.set_title("EDSS (missing column GT.EDSS)", fontsize=14, fontweight='bold')
ax_main.set_ylim(0, ymax_by_col.get(edss_col, 10))
ax_main.grid(True, alpha=0.3)
if found_column:
print(f"Found similar column: {found_column}")
if not patient_data[found_column].isna().all():
axes[i].plot(patient_data['MedDatum'], patient_data[found_column], marker='o', linewidth=2, label=found_column)
axes[i].set_ylabel('Score')
axes[i].set_title(f'Functional System: {system} (found as: {found_column})')
axes[i].grid(True, alpha=0.3)
axes[i].legend()
style_time_axis(ax_main)
# ---------- Small aligned plots ----------
small_axes = []
for k, (col, title) in enumerate(functional_systems):
r = 1 + (k // 4)
c = (k % 4)
ax = fig.add_subplot(gs[r, c], sharex=ax_main)
small_axes.append(ax)
ymax = ymax_by_col.get(col, default_ymax)
ax.set_title(title, fontsize=10)
ax.set_ylabel("Score")
ax.set_ylim(0, ymax)
ax.grid(True, alpha=0.3)
if col in patient_data.columns:
y = to_numeric_comma(patient_data[col]) # <-- changed
x = patient_data['MedDatum']
plot_df = pd.DataFrame({"x": x, "y": y}).dropna(subset=["x", "y"]).sort_values("x")
if not plot_df.empty:
ax.plot(plot_df["x"], plot_df["y"], marker='o', linewidth=2, color='tab:blue')
else:
axes[i].set_title(f'Functional System: {system} (Column not found)')
axes[i].set_ylabel('Score')
axes[i].grid(True, alpha=0.3)
ax.set_title(f"{title} (no data)", fontsize=10)
else:
ax.set_title(f"{title} (missing)", fontsize=10)
# Hide empty subplots
for i in range(len(functional_systems), len(axes)):
axes[i].set_visible(False)
style_time_axis(ax)
# Set x-axis label for the last row only
for i in range(len(functional_systems)):
if i >= len(axes) - num_cols: # Last row
axes[i].set_xlabel('Date')
# Force date formatting on all axes
for ax in axes:
ax.tick_params(axis='x', rotation=45)
ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%Y-%m-%d'))
ax.xaxis.set_major_locator(plt.matplotlib.dates.MonthLocator())
# Automatically format x-axis dates
plt.gcf().autofmt_xdate()
# Hide x tick labels on first row of small plots
for ax in small_axes[:4]:
ax.tick_params(labelbottom=False)
plt.tight_layout()
fig.subplots_adjust(hspace=0.7)
plt.show()
##
# %% Table
import pandas as pd
import matplotlib.pyplot as plt