Modifications

2026-04-27 11:52:53 +02:00
parent 816c50e467
commit 90d411f086
3 changed files with 446 additions and 98 deletions
@@ -401,7 +401,7 @@ import seaborn as sns
 import numpy as np

 # Load your data from TSV file
-file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/join_results_unique.tsv'
+file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/results/join_results_unique.tsv'
 df = pd.read_csv(file_path, sep='\t')

 # Replace comma with dot for numeric conversion in GT.EDSS and result.EDSS
@@ -745,7 +745,7 @@ df = df.rename(columns=column_mapping)
 df['MedDatum'] = pd.to_datetime(df['MedDatum'], errors='coerce')

 # Patient
-patient_id = '6389d658'
+patient_id = 'd13e4aa3'
 patient_data = df[df['unique_id'] == patient_id].sort_values('MedDatum').copy()
 if patient_data.empty:
    raise ValueError(f"No data found for patient: {patient_id}")
@@ -1764,100 +1764,7 @@ plt.show()



-# %% Difference Gemini easy
-
-
-# --- 1. Process Error Data ---
-system_names = [name.split('.')[1] for name, _ in functional_systems_to_plot]
-plot_list = []
-
-for gt_col, res_col in functional_systems_to_plot:
-    sys_name = gt_col.split('.')[1]
-
-    # Robust parsing
-    gt = df[gt_col].apply(safe_parse)
-    res = df[res_col].apply(safe_parse)
-    error = res - gt
-
-    # Calculate counts
-    matches = (error == 0).sum()
-    under = (error < 0).sum()
-    over = (error > 0).sum()
-    total = error.dropna().count()
-
-    # Calculate Percentages
-    # Using max(total, 1) to avoid division by zero
-    divisor = max(total, 1)
-    match_pct = (matches / divisor) * 100
-    under_pct = (under / divisor) * 100
-    over_pct = (over / divisor) * 100
-
-    plot_list.append({
-        'System': sys_name.replace('_', ' ').title(),
-        'Matches': matches,
-        'MatchPct': match_pct,
-        'Under': under,
-        'UnderPct': under_pct,
-        'Over': over,
-        'OverPct': over_pct
-    })
-
-stats_df = pd.DataFrame(plot_list)
-
-# --- 2. Plotting ---
-fig, ax = plt.subplots(figsize=(12, 8)) # Slightly taller for multi-line labels
-
-color_under = '#E74C3C'
-color_over = '#3498DB'
-bar_height = 0.6
-
-y_pos = np.arange(len(stats_df))
-
-ax.barh(y_pos, -stats_df['Under'], bar_height, label='Under-scored', color=color_under, edgecolor='white', alpha=0.8)
-ax.barh(y_pos, stats_df['Over'], bar_height, label='Over-scored', color=color_over, edgecolor='white', alpha=0.8)
-
-# --- 3. Aesthetics & Labels ---
-
-for i, row in stats_df.iterrows():
-    # Constructing a detailed label for the left side
-    # Matches (Bold) | Under % | Over %
-    label_text = (
-        f"$\mathbf{{{row['System']}}}$\n"
-        f"Matches: {int(row['Matches'])} ({row['MatchPct']:.1f}%)\n"
-        f"Under: {int(row['Under'])} ({row['UnderPct']:.1f}%) | Over: {int(row['Over'])} ({row['OverPct']:.1f}%)"
-    )
-
-    # Position text to the left of the x=0 line
-    ax.text(ax.get_xlim()[0] - 0.5, i, label_text, va='center', ha='right', fontsize=9, color='#333333', linespacing=1.3)
-
-# Zero line
-ax.axvline(0, color='black', linewidth=1.2, alpha=0.7)
-
-# Clean up axes
-ax.set_yticks([])
-ax.set_xlabel('Number of Patients with Error', fontsize=11, fontweight='bold', labelpad=10)
-#ax.set_title('Directional Error Analysis by Functional System', fontsize=14, pad=30)
-
-# Make X-axis labels absolute
-ax.set_xticklabels([int(abs(tick)) for tick in ax.get_xticks()])
-
-# Remove spines
-for spine in ['top', 'right', 'left']:
-    ax.spines[spine].set_visible(False)
-
-# Legend
-ax.legend(loc='upper right', frameon=False, bbox_to_anchor=(1, 1.1))
-
-# Grid
-ax.xaxis.grid(True, linestyle='--', alpha=0.3)
-
-plt.tight_layout()
-plt.show()
-##
-
-
-
-# %% name
+# %% Difference Plot Gemini 
 import pandas as pd
 import matplotlib.pyplot as plt
 import os
@@ -1946,6 +1853,136 @@ ax.legend(loc='upper right', frameon=False, bbox_to_anchor=(1, 1.1), ncol=2)
 plt.tight_layout()
 plt.show()
 ##
+
+
+# %% Functional System Error Boxplots
+import pandas as pd
+import matplotlib.pyplot as plt
+import os
+import numpy as np
+from matplotlib.patches import Patch
+from matplotlib.lines import Line2D
+
+# --- Configuration & Theme ---
+plt.rcParams['font.family'] = 'Arial'
+figure_save_path = 'project/visuals/functional_systems_boxplot.svg'
+
+# --- 1. Build error data for boxplots ---
+boxplot_data = []
+system_labels = []
+sample_sizes = []
+
+for gt_col, res_col in functional_systems_to_plot:
+    sys_name = gt_col.split('.')[1]
+
+    # Robust parsing
+    gt = df[gt_col].apply(safe_parse)
+    res = df[res_col].apply(safe_parse)
+
+    # Error = result - ground truth
+    error = (res - gt).dropna()
+
+    # Ignore all 0 errors
+    error = error[error != 0]
+
+    # Keep only systems that actually have non-zero data
+    if len(error) > 0:
+        clean_name = sys_name.replace('_', ' ').title()
+        boxplot_data.append(error.values)
+        system_labels.append(clean_name)
+        sample_sizes.append(len(error))
+
+# Safety check
+if not boxplot_data:
+    raise ValueError("No valid non-zero error data available for any functional system.")
+
+# Put n into x-axis labels so it doesn't overlap the plot
+xtick_labels = [f"{label}\n(n={n})" for label, n in zip(system_labels, sample_sizes)]
+
+# --- 2. Plotting ---
+fig, ax = plt.subplots(figsize=(14, 8))
+
+bp = ax.boxplot(
+    boxplot_data,
+    vert=True,
+    patch_artist=True,
+    labels=xtick_labels,
+    showmeans=True,
+    meanline=False
+)
+
+# --- 3. Styling ---
+box_face = '#D6EAF8'
+box_edge = '#2980B9'
+whisker_col = '#7F8C8D'
+median_col = '#C0392B'
+mean_col = '#1ABC9C'
+flier_face = '#95A5A6'
+flier_edge = '#7F8C8D'
+
+for box in bp['boxes']:
+    box.set(facecolor=box_face, edgecolor=box_edge, linewidth=1.5)
+
+for whisker in bp['whiskers']:
+    whisker.set(color=whisker_col, linewidth=1.2)
+
+for cap in bp['caps']:
+    cap.set(color=whisker_col, linewidth=1.2)
+
+for median in bp['medians']:
+    median.set(color=median_col, linewidth=2)
+
+for mean in bp['means']:
+    mean.set(marker='o', markerfacecolor=mean_col, markeredgecolor='black', markersize=6)
+
+for flier in bp['fliers']:
+    flier.set(marker='o', markerfacecolor=flier_face, markeredgecolor=flier_edge, alpha=0.6, markersize=4)
+
+# Reference line at zero error
+ax.axhline(0, color='black', linewidth=1.2, linestyle='--')
+
+# Labels and formatting
+ax.set_xlabel('Functional System', fontsize=11, fontweight='bold')
+ax.set_ylabel('Error (Result - Ground Truth)', fontsize=11, fontweight='bold')
+
+# Rotate x labels for readability
+plt.xticks(rotation=45, ha='right')
+
+# Grid and spines
+ax.yaxis.grid(True, linestyle='--', alpha=0.3)
+for spine in ['top', 'right']:
+    ax.spines[spine].set_visible(False)
+
+# --- 4. Legend above the plot, outside the axes ---
+legend_handles = [
+    Patch(facecolor=box_face, edgecolor=box_edge, label='IQR (25th-75th percentile)'),
+    Line2D([0], [0], color=median_col, lw=2, label='Median'),
+    Line2D([0], [0], marker='o', color='w', markerfacecolor=mean_col,
+           markeredgecolor='black', markersize=7, label='Mean'),
+    Line2D([0], [0], marker='o', color='w', markerfacecolor=flier_face,
+           markeredgecolor=flier_edge, alpha=0.8, markersize=6, label='Outlier'),
+    Line2D([0], [0], color='black', lw=1.2, linestyle='--', label='Zero error reference')
+]
+
+ax.legend(
+    handles=legend_handles,
+    loc='lower center',
+    bbox_to_anchor=(0.5, 1.02),
+    ncol=3,
+    frameon=False
+)
+
+# Leave room at the top for the legend
+plt.tight_layout(rect=[0, 0, 1, 0.90])
+
+# Optional save
+os.makedirs(os.path.dirname(figure_save_path), exist_ok=True)
+plt.savefig(figure_save_path, format='svg', bbox_inches='tight')
+
+plt.show()
+##
+
+
 # %% test
 # Diagnose: what are the actual differences?
 print("\n🔍 Raw differences (first 5 rows per system):")