optimized results and new benchmark

adjustment to triton
adjusting the script with new paths
2026-05-29 00:42:40 +02:00 · 2026-05-19 10:21:24 +02:00 · 2026-05-19 10:13:29 +02:00 · 2026-05-19 10:03:52 +02:00 · 2026-05-19 09:23:31 +02:00 · 2026-05-16 16:50:33 +02:00
15 changed files with 22478 additions and 963 deletions
@@ -1,16 +1,88 @@
-# 1. Broad Ignores
-/Data/*
-/attach/*
-/results/*
-/enarcelona/*
-.env
+# =========================
+# Python
+# =========================
 __pycache__/
-*.pyc
+*.py[cod]
+*$py.class
+.ipynb_checkpoints/

-# 2. Ignore virtual environments COMPLETELY
-# This must come BEFORE the unignore rule
+# =========================
+# Virtual environments
+# =========================
+env/
 env*/
+venv/
+.venv/
+enarcelona/

-# 3. The "Unignore" rule (Whitelisting)
-# We only unignore .py files that aren't already blocked by the rules above
-!**/*.py
+# =========================
+# Secrets
+# =========================
+.env
+*.env
+
+# =========================
+# Patient data / sensitive data
+# =========================
+Data/
+data/raw/
+data/processed/
+data/ground_truth/
+reference/
+
+# =========================
+# Generated results and logs
+# =========================
+results/
+results_edss_benchmark/
+*.log
+
+# =========================
+# Large/generated file types
+# =========================
+*.csv
+*.tsv
+*.json
+*.jsonl
+*.xlsx
+*.xls
+*.png
+*.PNG
+*.jpg
+*.jpeg
+*.svg
+*.pdf
+
+# =========================
+# Temporary / backup files
+# =========================
+*.tmp
+*.bak
+*.orig
+.DS_Store
+
+# =========================
+# Keep important code/config/docs
+# =========================
+!README.md
+!requirements.txt
+!*.py
+!*.md
+!*.yml
+!*.yaml
+!*.toml
+
+# Keep prompt templates / schemas if safe to publish
+!prompts/
+!prompts/**
+!attach/
+!attach/*.gbnf
+!attach/just_edss_text.txt
+!attach/Komplett.txt
+
+# Keep example/synthetic data only
+!data/
+!data/example/
+!data/example/**
+!Data/example/
+!Data/example/**
@@ -1,739 +0,0 @@
-# %% Scatter
-import pandas as pd
-import matplotlib.pyplot as plt
-import numpy as np
-
-# Load your data from TSV file
-file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/join_MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_results+MS_Briefe_400_with_unique_id_SHA3_explore_cleaned.tsv'
-df = pd.read_csv(file_path, sep='\t')
-
-# Replace comma with dot for numeric conversion in GT_EDSS and LLM_Results
-df['GT_EDSS'] = df['GT_EDSS'].astype(str).str.replace(',', '.')
-df['LLM_Results'] = df['LLM_Results'].astype(str).str.replace(',', '.')
-
-# Convert to float (handle invalid entries gracefully)
-df['GT_EDSS'] = pd.to_numeric(df['GT_EDSS'], errors='coerce')
-df['LLM_Results'] = pd.to_numeric(df['LLM_Results'], errors='coerce')
-
-# Drop rows where either column is NaN
-df_clean = df.dropna(subset=['GT_EDSS', 'LLM_Results'])
-
-# Create scatter plot
-plt.figure(figsize=(8, 6))
-plt.scatter(df_clean['GT_EDSS'], df_clean['LLM_Results'], alpha=0.7, color='blue')
-
-# Add labels and title
-plt.xlabel('GT_EDSS')
-plt.ylabel('LLM_Results')
-plt.title('Comparison of GT_EDSS vs LLM_Results')
-
-# Optional: Add a diagonal line for reference (perfect prediction)
-plt.plot([0, max(df_clean['GT_EDSS'])], [0, max(df_clean['GT_EDSS'])], color='red', linestyle='--', label='Perfect Prediction')
-plt.legend()
-
-# Show plot
-plt.grid(True)
-plt.tight_layout()
-plt.show()
-
-##
-
-
-# %% Bland0-altman
-
-import pandas as pd
-import matplotlib.pyplot as plt
-import numpy as np
-import statsmodels.api as sm
-
-# Load your data from TSV file
-file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/join_MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_results+MS_Briefe_400_with_unique_id_SHA3_explore_cleaned.tsv'
-df = pd.read_csv(file_path, sep='\t')
-
-# Replace comma with dot for numeric conversion in GT_EDSS and LLM_Results
-df['GT_EDSS'] = df['GT_EDSS'].astype(str).str.replace(',', '.')
-df['LLM_Results'] = df['LLM_Results'].astype(str).str.replace(',', '.')
-
-# Convert to float (handle invalid entries gracefully)
-df['GT_EDSS'] = pd.to_numeric(df['GT_EDSS'], errors='coerce')
-df['LLM_Results'] = pd.to_numeric(df['LLM_Results'], errors='coerce')
-
-# Drop rows where either column is NaN
-df_clean = df.dropna(subset=['GT_EDSS', 'LLM_Results'])
-
-# Create Bland-Altman plot
-f, ax = plt.subplots(1, figsize=(8, 5))
-sm.graphics.mean_diff_plot(df_clean['GT_EDSS'], df_clean['LLM_Results'], ax=ax)
-
-# Add labels and title
-ax.set_title('Bland-Altman Plot: GT_EDSS vs LLM_Results')
-ax.set_xlabel('Mean of GT_EDSS and LLM_Results')
-ax.set_ylabel('Difference between GT_EDSS and LLM_Results')
-
-# Display Bland-Altman plot
-plt.tight_layout()
-plt.show()
-
-# Print some statistics
-mean_diff = np.mean(df_clean['GT_EDSS'] - df_clean['LLM_Results'])
-std_diff = np.std(df_clean['GT_EDSS'] - df_clean['LLM_Results'])
-print(f"Mean difference: {mean_diff:.3f}")
-print(f"Standard deviation of differences: {std_diff:.3f}")
-print(f"95% Limits of Agreement: [{mean_diff - 1.96*std_diff:.3f}, {mean_diff + 1.96*std_diff:.3f}]")
-
-##
-
-
-
-# %%  Confusion matrix
-import pandas as pd
-import matplotlib.pyplot as plt
-import numpy as np
-from sklearn.metrics import confusion_matrix, classification_report
-import seaborn as sns
-
-# Load your data from TSV file
-file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv'
-df = pd.read_csv(file_path, sep='\t')
-
-# Replace comma with dot for numeric conversion in GT.EDSS and result.EDSS
-df['GT.EDSS'] = df['GT.EDSS'].astype(str).str.replace(',', '.')
-df['result.EDSS'] = df['result.EDSS'].astype(str).str.replace(',', '.')
-
-# Convert to float (handle invalid entries gracefully)
-df['GT.EDSS'] = pd.to_numeric(df['GT.EDSS'], errors='coerce')
-df['result.EDSS'] = pd.to_numeric(df['result.EDSS'], errors='coerce')
-
-# Drop rows where either column is NaN
-df_clean = df.dropna(subset=['GT.EDSS', 'result.EDSS'])
-
-# For confusion matrix, we need to categorize the values
-# Let's create categories up to 10 (0-1, 1-2, 2-3, ..., 9-10)
-def categorize_edss(value):
-    if pd.isna(value):
-        return np.nan
-    elif value <= 1.0:
-        return '0-1'
-    elif value <= 2.0:
-        return '1-2'
-    elif value <= 3.0:
-        return '2-3'
-    elif value <= 4.0:
-        return '3-4'
-    elif value <= 5.0:
-        return '4-5'
-    elif value <= 6.0:
-        return '5-6'
-    elif value <= 7.0:
-        return '6-7'
-    elif value <= 8.0:
-        return '7-8'
-    elif value <= 9.0:
-        return '8-9'
-    elif value <= 10.0:
-        return '9-10'
-    else:
-        return '10+'
-
-# Create categorical versions
-df_clean['GT.EDSS_cat'] = df_clean['GT.EDSS'].apply(categorize_edss)
-df_clean['result.EDSS_cat'] = df_clean['result.EDSS'].apply(categorize_edss)
-
-# Remove any NaN categories
-df_clean = df_clean.dropna(subset=['GT.EDSS_cat', 'result.EDSS_cat'])
-
-# Create confusion matrix
-cm = confusion_matrix(df_clean['GT.EDSS_cat'], df_clean['result.EDSS_cat'],
-                     labels=['0-1', '1-2', '2-3', '3-4', '4-5', '5-6', '6-7', '7-8', '8-9', '9-10'])
-
-# Plot confusion matrix
-plt.figure(figsize=(10, 8))
-sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
-            xticklabels=['0-1', '1-2', '2-3', '3-4', '4-5', '5-6', '6-7', '7-8', '8-9', '9-10'],
-            yticklabels=['0-1', '1-2', '2-3', '3-4', '4-5', '5-6', '6-7', '7-8', '8-9', '9-10'])
-plt.title('Confusion Matrix: Ground truth EDSS vs interferred EDSS (Categorized 0-10)')
-plt.xlabel('LLM Generated EDSS')
-plt.ylabel('Ground Truth EDSS')
-plt.tight_layout()
-plt.show()
-
-# Print classification report
-print("Classification Report:")
-print(classification_report(df_clean['GT.EDSS_cat'], df_clean['result.EDSS_cat']))
-
-# Print raw counts
-print("\nConfusion Matrix (Raw Counts):")
-print(cm)
-
-##
-
-
-
-# %% Classification 
-import pandas as pd
-import matplotlib.pyplot as plt
-import seaborn as sns
-from sklearn.metrics import confusion_matrix
-import numpy as np
-
-# Load your data from TSV file
-file_path ='/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv'
-
-df = pd.read_csv(file_path, sep='\t')
-
-# Check data structure
-print("Data shape:", df.shape)
-print("First few rows:")
-print(df.head())
-print("\nColumn names:")
-for col in df.columns:
-    print(f"  {col}")
-
-# Function to safely convert to boolean
-def safe_bool_convert(series):
-    '''Safely convert series to boolean, handling various input formats'''
-    # Convert to string first, then to boolean
-    series_str = series.astype(str).str.strip().str.lower()
-
-    # Handle different true/false representations
-    bool_map = {
-        'true': True, '1': True, 'yes': True, 'y': True,
-        'false': False, '0': False, 'no': False, 'n': False
-    }
-
-    converted = series_str.map(bool_map)
-
-    # Handle remaining NaN values
-    converted = converted.fillna(False)  # or True, depending on your preference
-
-    return converted
-
-# Convert columns safely
-if 'result.klassifizierbar' in df.columns:
-    print("\nresult.klassifizierbar column info:")
-    print(df['result.klassifizierbar'].head(10))
-    print("Unique values:", df['result.klassifizierbar'].unique())
-
-    df['result.klassifizierbar'] = safe_bool_convert(df['result.klassifizierbar'])
-    print("After conversion:")
-    print(df['result.klassifizierbar'].value_counts())
-
-if 'GT.klassifizierbar' in df.columns:
-    print("\nGT.klassifizierbar column info:")
-    print(df['GT.klassifizierbar'].head(10))
-    print("Unique values:", df['GT.klassifizierbar'].unique())
-
-    df['GT.klassifizierbar'] = safe_bool_convert(df['GT.klassifizierbar'])
-    print("After conversion:")
-    print(df['GT.klassifizierbar'].value_counts())
-
-# Create bar chart showing only True values for klassifizierbar
-if 'result.klassifizierbar' in df.columns and 'GT.klassifizierbar' in df.columns:
-    # Get counts for True values only
-    llm_true_count = df['result.klassifizierbar'].sum()
-    gt_true_count = df['GT.klassifizierbar'].sum()
-
-    # Plot using matplotlib directly
-    fig, ax = plt.subplots(figsize=(8, 6))
-
-    x = np.arange(2)
-    width = 0.35
-
-    bars1 = ax.bar(x[0] - width/2, llm_true_count, width, label='LLM', color='skyblue', alpha=0.8)
-    bars2 = ax.bar(x[1] + width/2, gt_true_count, width, label='GT', color='lightcoral', alpha=0.8)
-
-    # Add value labels on bars
-    ax.annotate(f'{llm_true_count}',
-                xy=(x[0], llm_true_count),
-                xytext=(0, 3),
-                textcoords="offset points",
-                ha='center', va='bottom')
-
-    ax.annotate(f'{gt_true_count}',
-                xy=(x[1], gt_true_count),
-                xytext=(0, 3),
-                textcoords="offset points",
-                ha='center', va='bottom')
-
-    ax.set_xlabel('Classification Status (klassifizierbar)')
-    ax.set_ylabel('Count')
-    ax.set_title('True Values Comparison: LLM vs GT for "klassifizierbar"')
-    ax.set_xticks(x)
-    ax.set_xticklabels(['LLM', 'GT'])
-    ax.legend()
-
-    plt.tight_layout()
-    plt.show()
-
-# Create confusion matrix if both columns exist
-if 'result.klassifizierbar' in df.columns and 'GT.klassifizierbar' in df.columns:
-    try:
-        # Ensure both columns are boolean
-        llm_bool = df['result.klassifizierbar'].fillna(False).astype(bool)
-        gt_bool = df['GT.klassifizierbar'].fillna(False).astype(bool)
-
-        cm = confusion_matrix(gt_bool, llm_bool)
-
-        # Plot confusion matrix
-        fig, ax = plt.subplots(figsize=(8, 6))
-        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
-                   xticklabels=['False ', 'True '],
-                   yticklabels=['False', 'True '],
-                   ax=ax)
-        ax.set_xlabel('LLM Predictions ')
-        ax.set_ylabel('GT Labels ')
-        ax.set_title('Confusion Matrix: LLM vs GT for "klassifizierbar"')
-
-        plt.tight_layout()
-        plt.show()
-
-        print("Confusion Matrix:")
-        print(cm)
-
-    except Exception as e:
-        print(f"Error creating confusion matrix: {e}")
-
-# Show final data info
-print("\nFinal DataFrame info:")
-print(df[['result.klassifizierbar', 'GT.klassifizierbar']].info())
-
-##
-
-
-
-
-# %% Boxplot
-import pandas as pd
-import matplotlib.pyplot as plt
-import seaborn as sns
-import numpy as np
-
-# Load your data from TSV file
-file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/join_results_unique.tsv'
-df = pd.read_csv(file_path, sep='\t')
-
-# Replace comma with dot for numeric conversion in GT.EDSS and result.EDSS
-df['GT.EDSS'] = df['GT.EDSS'].astype(str).str.replace(',', '.')
-df['result.EDSS'] = df['result.EDSS'].astype(str).str.replace(',', '.')
-
-# Convert to float (handle invalid entries gracefully)
-df['GT.EDSS'] = pd.to_numeric(df['GT.EDSS'], errors='coerce')
-df['result.EDSS'] = pd.to_numeric(df['result.EDSS'], errors='coerce')
-
-# Drop rows where either column is NaN
-df_clean = df.dropna(subset=['GT.EDSS', 'result.EDSS'])
-
-# 1. DEFINE CATEGORY ORDER
-# This ensures the X-axis is numerically logical (0-1 comes before 1-2)
-category_order = ['0-1', '1-2', '2-3', '3-4', '4-5', '5-6', '6-7', '7-8', '8-9', '9-10', '10+']
-
-# Convert the column to a Categorical type with the specific order
-df_clean['GT.EDSS_cat'] = pd.Categorical(df_clean['GT.EDSS'].apply(categorize_edss), 
-                                         categories=category_order, 
-                                         ordered=True)
-
-plt.figure(figsize=(14, 8))
-
-# 2. ADD HUE FOR LEGEND
-# Assigning x to 'hue' allows Seaborn to generate a legend automatically
-box_plot = sns.boxplot(
-    data=df_clean, 
-    x='GT.EDSS_cat', 
-    y='result.EDSS',
-    hue='GT.EDSS_cat',  # Added hue
-    palette='viridis', 
-    linewidth=1.5,
-    legend=True         # Ensure legend is enabled
-)
-
-# 3. CUSTOMIZE PLOT
-plt.title('Distribution of result.EDSS by GT.EDSS Category', fontsize=18, pad=20)
-plt.xlabel('Ground Truth EDSS Category', fontsize=14)
-plt.ylabel('LLM Predicted EDSS', fontsize=14)
-
-# Move legend to the side or top
-plt.legend(title="EDSS Categories", bbox_to_anchor=(1.05, 1), loc='upper left')
-
-plt.xticks(rotation=45, ha='right', fontsize=10)
-plt.grid(True, axis='y', alpha=0.3)
-plt.tight_layout()
-
-plt.show()
-##
-
-
-# %% Postproccessing Column names
-
-import pandas as pd
-
-# Read the TSV file
-file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv'
-df = pd.read_csv(file_path, sep='\t')
-
-# Create a mapping dictionary for German to English column names
-column_mapping = {
-    'EDSS':'GT.EDSS',
-    'klassifizierbar': 'GT.klassifizierbar',
-    'Sehvermögen': 'GT.VISUAL_OPTIC_FUNCTIONS',
-    'Cerebellum': 'GT.CEREBELLAR_FUNCTIONS',
-    'Hirnstamm': 'GT.BRAINSTEM_FUNCTIONS',
-    'Sensibiliät': 'GT.SENSORY_FUNCTIONS',
-    'Pyramidalmotorik': 'GT.PYRAMIDAL_FUNCTIONS',
-    'Ambulation': 'GT.AMBULATION',
-    'Cerebrale_Funktion': 'GT.CEREBRAL_FUNCTIONS',
-    'Blasen-_und_Mastdarmfunktion': 'GT.BOWEL_AND_BLADDER_FUNCTIONS'
-}
-
-# Rename columns
-df = df.rename(columns=column_mapping)
-
-# Save the modified dataframe back to TSV file
-df.to_csv(file_path, sep='\t', index=False)
-
-print("Columns have been successfully renamed!")
-print("Renamed columns:")
-for old_name, new_name in column_mapping.items():
-    if old_name in df.columns:
-        print(f"  {old_name} -> {new_name}")
-
-
-##
-
-
-
-
-# %% Styled table
-import pandas as pd
-import numpy as np
-import seaborn as sns
-import matplotlib.pyplot as plt
-import dataframe_image as dfi
-# Load data
-df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t')
-
-# 1. Identify all GT and result columns
-gt_columns = [col for col in df.columns if col.startswith('GT.')]
-result_columns = [col for col in df.columns if col.startswith('result.')]
-
-print("GT Columns found:", gt_columns)
-print("Result Columns found:", result_columns)
-
-# 2. Create proper mapping between GT and result columns
-# Handle various naming conventions (spaces, underscores, etc.)
-column_mapping = {}
-
-for gt_col in gt_columns:
-    base_name = gt_col.replace('GT.', '')
-
-    # Clean the base name for matching - remove spaces, underscores, etc.
-    # Try different matching approaches
-    candidates = [
-        f'result.{base_name}',  # Exact match
-        f'result.{base_name.replace(" ", "_")}',  # With underscores
-        f'result.{base_name.replace("_", " ")}',  # With spaces
-        f'result.{base_name.replace(" ", "")}',   # No spaces
-        f'result.{base_name.replace("_", "")}'    # No underscores
-    ]
-
-    # Also try case-insensitive matching
-    candidates.append(f'result.{base_name.lower()}')
-    candidates.append(f'result.{base_name.upper()}')
-
-    # Try to find matching result column
-    matched = False
-    for candidate in candidates:
-        if candidate in result_columns:
-            column_mapping[gt_col] = candidate
-            matched = True
-            break
-
-    # If no exact match found, try partial matching
-    if not matched:
-        # Try to match by removing special characters and comparing
-        base_clean = ''.join(e for e in base_name if e.isalnum() or e in ['_', ' '])
-        for result_col in result_columns:
-            result_base = result_col.replace('result.', '')
-            result_clean = ''.join(e for e in result_base if e.isalnum() or e in ['_', ' '])
-            if base_clean.lower() == result_clean.lower():
-                column_mapping[gt_col] = result_col
-                matched = True
-                break
-
-print("Column mapping:", column_mapping)
-
-# 3. Faster, vectorized computation using the corrected mapping
-data_list = []
-
-for gt_col, result_col in column_mapping.items():
-    print(f"Processing {gt_col} vs {result_col}")
-
-    # Convert to numeric, forcing errors to NaN
-    s1 = pd.to_numeric(df[gt_col], errors='coerce').astype(float)
-    s2 = pd.to_numeric(df[result_col], errors='coerce').astype(float)
-
-    # Calculate matches (abs difference <= 0.5)
-    diff = np.abs(s1 - s2)
-    matches = (diff <= 0.5).sum()
-
-    # Determine the denominator (total valid comparisons)
-    valid_count = diff.notna().sum()
-
-    if valid_count > 0:
-        percentage = (matches / valid_count) * 100
-    else:
-        percentage = 0
-
-    # Extract clean base name for display
-    base_name = gt_col.replace('GT.', '')
-
-    data_list.append({
-        'GT': base_name,
-        'Match %': round(percentage, 1)
-    })
-
-
-
-
-# 4. Prepare Data
-match_df = pd.DataFrame(data_list)
-# Clean up labels: Replace underscores with spaces and capitalize
-match_df['GT'] = match_df['GT'].str.replace('_', ' ').str.title()
-match_df = match_df.sort_values('Match %', ascending=False)
-
-# 5. Create a "Beautiful" Table using Seaborn Heatmap
-def create_luxury_table(df, output_file="edss_agreement.png"):
-    # Set the aesthetic style
-    sns.set_theme(style="white", font="sans-serif")
-
-    # Prepare data for heatmap
-    plot_data = df.set_index('GT')[['Match %']]
-
-    # Initialize the figure
-    # Height is dynamic based on number of rows
-    fig, ax = plt.subplots(figsize=(8, len(df) * 0.6))
-
-    # Create a custom diverging color map (Deep Red -> Mustard -> Emerald)
-    # This looks more professional than standard 'RdYlGn'
-    cmap = sns.diverging_palette(15, 135, s=80, l=55, as_cmap=True)
-
-    # Draw the heatmap
-    sns.heatmap(
-        plot_data,
-        annot=True,
-        fmt=".1f",
-        cmap=cmap,
-        center=85,      # Centers the color transition
-        vmin=50, vmax=100, # Range of the gradient
-        linewidths=2,
-        linecolor='white',
-        cbar=False,     # Remove color bar for a "table" look
-        annot_kws={"size": 14, "weight": "bold", "family": "sans-serif"}
-    )
-
-    # Styling the Axes (Turning the heatmap into a table)
-    ax.set_xlabel("")
-    ax.set_ylabel("")
-    ax.xaxis.tick_top() # Move "Match %" label to top
-    ax.set_xticklabels(['Agreement (%)'], fontsize=14, fontweight='bold', color='#2c3e50')
-    ax.tick_params(axis='y', labelsize=12, labelcolor='#2c3e50', length=0)
-
-    # Add a thin border around the plot
-    for _, spine in ax.spines.items():
-        spine.set_visible(True)
-        spine.set_color('#ecf0f1')
-
-    plt.title('EDSS Subcategory Consistency Analysis', fontsize=16, pad=40, fontweight='bold', color='#2c3e50')
-
-    # Add a subtle footer
-    plt.figtext(0.5, 0.0, "Tolerance: ±0.5 points",
-                wrap=True, horizontalalignment='center', fontsize=10, color='gray', style='italic')
-
-    # Save with high resolution
-    plt.tight_layout()
-    plt.savefig(output_file, dpi=300, bbox_inches='tight')
-    print(f"Beautiful table saved as {output_file}")
-
-# Execute
-create_luxury_table(match_df)
-
-
-# Run the function
-save_styled_table(match_df)
-# 6. Save as SVG
-
-plt.savefig("agreement_table.svg", format='svg', dpi=300, bbox_inches='tight')
-print("Successfully saved agreement_table.svg")
-
-# Show plot if running in a GUI environment
-plt.show()
-##
-
-
-
-# %% Time Plot
-import numpy as np
-import matplotlib.pyplot as plt
-import pandas as pd
-from scipy import stats
-
-# Load the TSV file
-file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv'
-df = pd.read_csv(file_path, sep='\t')
-
-# Extract the inference_time_sec column
-inference_times = df['inference_time_sec'].dropna()  # Remove NaN values
-
-# Calculate statistics
-mean_time = inference_times.mean()
-std_time = inference_times.std()
-median_time = np.median(inference_times)
-
-# Create the histogram
-fig, ax = plt.subplots(figsize=(10, 6))
-
-# Create histogram with bins of 1 second width
-min_time = int(inference_times.min())
-max_time = int(inference_times.max()) + 1
-bins = np.arange(min_time, max_time + 1, 1)  # Bins of 1 second width
-
-# Create histogram with counts (not probability density)
-n, bins, patches = ax.hist(inference_times, bins=bins, color='lightblue', alpha=0.7, edgecolor='black', linewidth=0.5)
-
-# Generate Gaussian curve for fit
-x = np.linspace(inference_times.min(), inference_times.max(), 100)
-# Scale Gaussian to match histogram counts
-gaussian_counts = stats.norm.pdf(x, mean_time, std_time) * len(inference_times) * (bins[1] - bins[0])
-
-# Plot Gaussian fit
-ax.plot(x, gaussian_counts, color='red', linewidth=2, label=f'Gaussian Fit (μ={mean_time:.1f}s, σ={std_time:.1f}s)')
-
-# Add vertical lines for mean and median
-ax.axvline(mean_time, color='blue', linestyle='--', linewidth=2, label=f'Mean = {mean_time:.1f}s')
-ax.axvline(median_time, color='green', linestyle='--', linewidth=2, label=f'Median = {median_time:.1f}s')
-
-# Add standard deviation as vertical lines
-ax.axvline(mean_time + std_time, color='saddlebrown', linestyle=':', linewidth=1, alpha=0.7, label=f'+1σ = {mean_time + std_time:.1f}s')
-ax.axvline(mean_time - std_time, color='saddlebrown', linestyle=':', linewidth=1, alpha=0.7, label=f'-1σ = {mean_time - std_time:.1f}s')
-
-ax.set_xlabel('Inference Time (seconds)')
-ax.set_ylabel('Frequency')
-ax.set_title('Inference Time Distribution with Gaussian Fit')
-ax.legend()
-ax.grid(True, alpha=0.3)
-
-plt.tight_layout()
-plt.show()
-
-##
-
-
-
-
-
-
-# %% Dashboard 
-import pandas as pd
-import matplotlib.pyplot as plt
-import seaborn as sns
-from datetime import datetime
-import numpy as np
-
-# Load the data
-file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv'
-df = pd.read_csv(file_path, sep='\t')
-
-# Rename columns to remove 'result.' prefix and handle spaces
-column_mapping = {}
-for col in df.columns:
-    if col.startswith('result.'):
-        new_name = col.replace('result.', '')
-        # Handle spaces in column names (replace with underscores if needed)
-        new_name = new_name.replace(' ', '_')
-        column_mapping[col] = new_name
-df = df.rename(columns=column_mapping)
-
-# Convert MedDatum to datetime
-df['MedDatum'] = pd.to_datetime(df['MedDatum'])
-
-# Check what columns actually exist in the dataset
-print("Available columns:")
-print(df.columns.tolist())
-print("\nFirst few rows:")
-print(df.head())
-
-# Hardcode specific patient names
-patient_names = ['6ccda8c6']
-
-# Define the functional systems (columns to plot) - adjust based on actual column names
-functional_systems = ['EDSS', 'Visual', 'Sensory', 'Motor', 'Brainstem', 'Cerebellar', 'Autonomic', 'Bladder', 'Intellectual']
-
-# Create subplots horizontally (2 columns, adjust rows as needed)
-num_plots = len(functional_systems)
-num_cols = 2
-num_rows = (num_plots + num_cols - 1) // num_cols  # Ceiling division
-
-fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 4*num_rows), sharex=True)
-if num_plots == 1:
-    axes = [axes]
-elif num_rows == 1:
-    axes = axes
-else:
-    axes = axes.flatten()
-
-# Plot for the hardcoded patient
-for i, system in enumerate(functional_systems):
-    # Filter data for this specific patient
-    patient_data = df[df['unique_id'] == patient_names[0]].sort_values('MedDatum')
-
-    # Check if patient data exists
-    if patient_data.empty:
-        print(f"No data found for patient: {patient_names[0]}")
-        continue
-
-    # Check if the system column exists in the data
-    if system in patient_data.columns:
-        # Plot the specific functional system
-        if not patient_data[system].isna().all():
-            axes[i].plot(patient_data['MedDatum'], patient_data[system], marker='o', linewidth=2, label=system)
-            axes[i].set_ylabel('Score')
-            axes[i].set_title(f'Functional System: {system}')
-            axes[i].grid(True, alpha=0.3)
-            axes[i].legend()
-        else:
-            axes[i].set_title(f'Functional System: {system} (No data)')
-            axes[i].set_ylabel('Score')
-            axes[i].grid(True, alpha=0.3)
-    else:
-        # Try to find column with similar name (case insensitive)
-        found_column = None
-        for col in df.columns:
-            if system.lower() in col.lower():
-                found_column = col
-                break
-
-        if found_column:
-            print(f"Found similar column: {found_column}")
-            if not patient_data[found_column].isna().all():
-                axes[i].plot(patient_data['MedDatum'], patient_data[found_column], marker='o', linewidth=2, label=found_column)
-                axes[i].set_ylabel('Score')
-                axes[i].set_title(f'Functional System: {system} (found as: {found_column})')
-                axes[i].grid(True, alpha=0.3)
-                axes[i].legend()
-        else:
-            axes[i].set_title(f'Functional System: {system} (Column not found)')
-            axes[i].set_ylabel('Score')
-            axes[i].grid(True, alpha=0.3)
-
-# Hide empty subplots
-for i in range(len(functional_systems), len(axes)):
-    axes[i].set_visible(False)
-
-# Set x-axis label for the last row only
-for i in range(len(functional_systems)):
-    if i >= len(axes) - num_cols:  # Last row
-        axes[i].set_xlabel('Date')
-
-plt.tight_layout()
-plt.show()
-
-##
@@ -1,135 +0,0 @@
-import pandas as pd
-import numpy as np
-import seaborn as sns
-import matplotlib.pyplot as plt
-import dataframe_image as dfi
-# Load data
-df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t')
-
-# 1. Identify all GT and result columns
-gt_columns = [col for col in df.columns if col.startswith('GT.')]
-result_columns = [col for col in df.columns if col.startswith('result.')]
-
-print("GT Columns found:", gt_columns)
-print("Result Columns found:", result_columns)
-
-# 2. Create proper mapping between GT and result columns
-# Handle various naming conventions (spaces, underscores, etc.)
-column_mapping = {}
-
-for gt_col in gt_columns:
-    base_name = gt_col.replace('GT.', '')
-
-    # Clean the base name for matching - remove spaces, underscores, etc.
-    # Try different matching approaches
-    candidates = [
-        f'result.{base_name}',  # Exact match
-        f'result.{base_name.replace(" ", "_")}',  # With underscores
-        f'result.{base_name.replace("_", " ")}',  # With spaces
-        f'result.{base_name.replace(" ", "")}',   # No spaces
-        f'result.{base_name.replace("_", "")}'    # No underscores
-    ]
-
-    # Also try case-insensitive matching
-    candidates.append(f'result.{base_name.lower()}')
-    candidates.append(f'result.{base_name.upper()}')
-
-    # Try to find matching result column
-    matched = False
-    for candidate in candidates:
-        if candidate in result_columns:
-            column_mapping[gt_col] = candidate
-            matched = True
-            break
-
-    # If no exact match found, try partial matching
-    if not matched:
-        # Try to match by removing special characters and comparing
-        base_clean = ''.join(e for e in base_name if e.isalnum() or e in ['_', ' '])
-        for result_col in result_columns:
-            result_base = result_col.replace('result.', '')
-            result_clean = ''.join(e for e in result_base if e.isalnum() or e in ['_', ' '])
-            if base_clean.lower() == result_clean.lower():
-                column_mapping[gt_col] = result_col
-                matched = True
-                break
-
-print("Column mapping:", column_mapping)
-
-# 3. Faster, vectorized computation using the corrected mapping
-data_list = []
-
-for gt_col, result_col in column_mapping.items():
-    print(f"Processing {gt_col} vs {result_col}")
-
-    # Convert to numeric, forcing errors to NaN
-    s1 = pd.to_numeric(df[gt_col], errors='coerce').astype(float)
-    s2 = pd.to_numeric(df[result_col], errors='coerce').astype(float)
-
-    # Calculate matches (abs difference <= 0.5)
-    diff = np.abs(s1 - s2)
-    matches = (diff <= 0.5).sum()
-
-    # Determine the denominator (total valid comparisons)
-    valid_count = diff.notna().sum()
-
-    if valid_count > 0:
-        percentage = (matches / valid_count) * 100
-    else:
-        percentage = 0
-
-    # Extract clean base name for display
-    base_name = gt_col.replace('GT.', '')
-
-    data_list.append({
-        'GT': base_name,
-        'Match %': round(percentage, 1)
-    })
-
-
-
-# 4. Prepare Data for Plotting
-match_df = pd.DataFrame(data_list)
-match_df = match_df.sort_values('Match %', ascending=False) # Sort for better visual flow
-
-# 5. Create the Styled Gradient Table
-def style_agreement_table(df):
-    return (df.style
-        .format({'Match %': '{:.1f}%'}) # Add % sign
-        .background_gradient(cmap='RdYlGn', subset=['Match %'], vmin=50, vmax=100) # Red to Green gradient
-        .set_properties(**{
-            'text-align': 'center',
-            'font-size': '12pt',
-            'border-collapse': 'collapse',
-            'border': '1px solid #D3D3D3'
-        })
-        .set_table_styles([
-            # Style the header
-            {'selector': 'th', 'props': [
-                ('background-color', '#404040'), 
-                ('color', 'white'),
-                ('font-weight', 'bold'),
-                ('text-transform', 'uppercase'),
-                ('padding', '10px')
-            ]},
-            # Add hover effect
-            {'selector': 'tr:hover', 'props': [('background-color', '#f5f5f5')]}
-        ])
-        .set_caption("EDSS Agreement Analysis: Ground Truth vs. Results (Tolerance ±0.5)")
-    )
-
-# To display in a Jupyter Notebook:
-styled_table = style_agreement_table(match_df)
-styled_table
-
-dfi.export(styled_table, "styled_table.png")
-#styled_table.to_html("agreement_report.html")
-# 6. Save as SVG
-
-#plt.savefig("agreement_table.svg", format='svg', dpi=300, bbox_inches='tight')
-#print("Successfully saved agreement_table.svg")
-
-# Show plot if running in a GUI environment
-plt.show()
-
-
@@ -1,74 +0,0 @@
-import pandas as pd
-import numpy as np
-import seaborn as sns
-
-# Sample data (replace with your actual df)
-df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t')
-
-# Identify GT and Result columns
-gt_columns = [col for col in df.columns if col.startswith('GT.')]
-result_columns = [col for col in df.columns if col.startswith('result.')]
-
-# Create mapping
-column_mapping = {}
-for gt_col in gt_columns:
-    base_name = gt_col.replace('GT.', '')
-    result_col = f'result.{base_name}'
-    if result_col in result_columns:
-        column_mapping[gt_col] = result_col
-
-# Function to compute match percentage for each GT-Result pair
-def compute_match_percentages(df, column_mapping):
-    percentages = []
-    for gt_col, result_col in column_mapping.items():
-        count = 0
-        total = len(df)
-
-        for _, row in df.iterrows():
-            gt_val = row[gt_col]
-            result_val = row[result_col]
-
-            # Handle NaN values
-            if pd.isna(gt_val) or pd.isna(result_val):
-                continue
-
-            # Handle non-numeric values
-            try:
-                gt_float = float(gt_val)
-                result_float = float(result_val)
-            except (ValueError, TypeError):
-                # Skip rows with non-numeric values
-                continue
-
-            # Check if values are within 0.5 tolerance
-            if abs(gt_float - result_float) <= 0.5:
-                count += 1
-
-        percentage = (count / total) * 100
-        percentages.append({
-            'GT_Column': gt_col,
-            'Result_Column': result_col,
-            'Match_Percentage': round(percentage, 1)
-        })
-
-    return pd.DataFrame(percentages)
-
-# Compute match percentages
-match_df = compute_match_percentages(df, column_mapping)
-
-# Create a pivot table for gradient display (optional but helpful)
-pivot_table = match_df.set_index(['GT_Column', 'Result_Column'])['Match_Percentage'].unstack(fill_value=0)
-
-# Apply gradient background
-cm = sns.light_palette("green", as_cmap=True)
-styled_table = pivot_table.style.background_gradient(cmap=cm, axis=None)
-
-# Display result
-print("Agreement Percentage Table (with gradient):")
-styled_table
-
-
-
-# Save the styled table to a file
-styled_table.to_html("agreement_report.html")
-print("Report saved to agreement_report.html")
@@ -0,0 +1,31 @@
+# Project Structure
+
+This project was reorganized into:
+
+- `data/`
+  - `raw/`: original raw data, if retained locally
+  - `processed/`: cleaned or derived input data
+  - `ground_truth/`: manually annotated reference data
+  - `external/`: externally provided data
+
+- `prompts/`
+  - EDSS instructions and prompt/schema assets
+
+- `scripts/`
+  - runnable analysis and plotting scripts
+
+- `results/`
+  - `benchmark_runs/`: full model benchmark runs
+  - `final_results/`: final selected model outputs
+  - `figures/`: generated figures
+  - `tables/`: generated tables
+  - `logs/`: terminal logs
+
+- `manuscript/`
+  - final figures and tables for paper/thesis writing
+
+- `archive/`
+  - old scripts, old results, temporary files, and unclear legacy files
+
+Important:
+The reorganization was performed after creating a full timestamped backup.
@@ -216,6 +216,3 @@ if __name__ == "__main__":



-# %% name
-eXXXXXXXX
-##
@@ -0,0 +1,481 @@
+1 VISUAL OPTIC FUNCTIONS
+
+VISUAL ACUITY
+The visual acuity score is based on the line in the Snellen chart at 20 feet 5 meters
+for which the patient makes no more than one error using best available correction
+Alternatively best corrected near vision can be assessed but this should be noted and
+consistently performed during follow up examinations Switching from near to distance
+visual acuity measurements should be avoided in follow up examinations
+
+VISUAL FIELDS
+0 normal
+1 signs only deficits present only on formal confrontational testing
+2 moderate patient aware of deficit but incomplete hemianopsia on examination
+3 marked complete homonymous hemianopsia or equivalent
+
+SCOTOMA
+0 none
+1 small detectable only on formal confrontational testing
+2 large spontaneously reported by patient
+
+* DISC PALLOR
+0 not present
+1 present
+
+NOTE
+When determining the EDSS step the Visual FS score must be converted to a lower
+score as follows
+Visual FS Score 6 5 4 3 2 1
+Converted Visual FS Score 4 3 3 2 2 1
+
+FUNCTIONAL SYSTEM SCORE
+0 normal
+1 disc pallor and or small scotoma and or visual acuity corrected of worse eye less than 20 20 1.0 but better than 20 30 0.67
+2 worse eye with maximal visual acuity corrected of 20 30 to 20 59 0.67 – 0.34
+3 worse eye with large scotoma and or moderate decrease in fields and or maximal visual acuity corrected of 20 60 to 20 99 0.33 – 0.21
+4 worse eye with marked decrease of fields and or maximal visual acuity corrected of 20 100 to 20 200 0.2 – 0.1 grade 3 plus maximal acuity of better eye of 20 60 0.33 or less
+5 worse eye with maximal visual acuity corrected less than 20 200 0.1 grade 4 plus maximal acuity of better eye of 20 60 0.33 or less
+6 grade 5 plus maximal acuity of better eye of 20 60 0.33 or less *  =  optional part of the examination
+
+
+### BRAINSTEM FUNCTIONS
+
+**DYSARTHRIA**
+- **0**: None
+- **1**: Signs only
+- **2**: Mild: Clinically detectable, patient is aware
+- **3**: Moderate: Obvious during conversation, impairs comprehension
+- **4**: Marked: Incomprehensible speech
+- **5**: Inability to speak
+
+**DYSPHAGIA**
+- **0**: None
+- **1**: Signs only
+- **2**: Mild: Difficulty with thin liquids
+- **3**: Moderate: Difficulty with liquids and solid food
+- **4**: Marked: Sustained difficulty, requires pureed diet
+- **5**: Inability to swallow
+
+**OTHER CRANIAL NERVE FUNCTIONS**
+- **0**: Normal
+- **1**: Signs only
+- **2**: Mild disability: Clinically detectable deficit, patient is usually aware
+- **3**: Moderate disability
+- **4**: Marked disability
+
+**EXTRAOCULAR MOVEMENTS (EOM) IMPAIRMENT**
+- **0**: None
+- **1**: Signs only: Subtle EOM weakness, no complaints of vision issues
+- **2**: Mild: Subtle EOM weakness or obvious incomplete paralysis not noticed by patient
+- **3**: Moderate: Obvious incomplete paralysis noticed by patient or complete loss in one direction
+- **4**: Marked: Complete loss in more than one direction
+
+**NYSTAGMUS**
+- **0**: None
+- **1**: Signs only or mild: Gaze-evoked nystagmus below moderate limits (equivalent to Brainstem FS score of 1)
+- **2**: Moderate: Sustained nystagmus on horizontal/vertical gaze at 30 degrees, patient may not notice
+- **3**: Severe: Nystagmus in primary position or coarse persistent nystagmus interfering with vision; complete internuclear ophthalmoplegia; oscillopsia
+
+**TRIGEMINAL DAMAGE**
+- **0**: None
+- **1**: Signs only
+- **2**: Mild: Clinically detectable numbness, patient is aware
+- **3**: Moderate: Impaired sharp/dull discrimination in one to three branches or trigeminal neuralgia (at least one recent attack)
+- **4**: Marked: Unable to discriminate between sharp/dull or complete loss of sensation in one or both nerves
+
+**FACIAL WEAKNESS**
+- **0**: None
+- **1**: Signs only
+- **2**: Mild: Clinically detectable weakness, patient is aware
+- **3**: Moderate: Incomplete facial palsy (e.g., eye closure requires patching, drooling)
+- **4**: Marked: Complete unilateral or bilateral facial palsy with lagophthalmus or difficulty with liquids
+
+**HEARING LOSS**
+- **0**: None
+- **1**: Signs only: Hears finger rub less on one/both sides, lateralized Weber test but no complaints
+- **2**: Mild: As in 1, aware of hearing problem
+- **3**: Moderate: Does not hear finger rub on one/both sides, misses several whispered numbers
+- **4**: Marked: Misses all or nearly all whispered numbers
+
+**FUNCTIONAL SYSTEM SCORE**
+- **0**: Normal
+- **1**: Signs only
+- **2**: Moderate nystagmus/EOM impairment/other mild disability
+- **3**: Severe nystagmus/marked EOM impairment/moderate other cranial nerve disability
+- **4**: Marked dysarthria/other marked disability
+- **5**: Inability to swallow or speak
+
+### PYRAMIDAL FUNCTIONS
+
+#### REFLEXES
+- **0**: Absent
+- **1**: Diminished
+- **2**: Normal
+- **3**: Exaggerated
+- **4**: Nonsustained clonus (a few beats of clonus)
+- **5**: Sustained clonus
+
+##### Cutaneous Reflexes
+- **0**: Normal
+- **1**: Weak
+- **2**: Absent
+
+###### Palmomental Reflex
+- **0**: Absent
+- **1**: Present
+
+###### Plantar Response
+- **0**: Flexor
+- **1**: Neutral or equivocal
+- **2**: Extensor
+
+#### LIMB STRENGTH
+The weakest muscle in each group defines the score for that muscle group. Optional functional tests (hopping on one foot and walking on heels/toes) are recommended for BMRC grades 3–5.
+
+##### BMRC Rating Scale
+- **0**: No muscle contraction detected
+- **1**: Visible contraction without visible joint movement
+- **2**: Visible movement only on the plane of gravity
+- **3**: Active movement against gravity, but not against resistance
+- **4**: Active movement against resistance, but not full strength
+- **5**: Normal strength
+
+#### FUNCTIONAL TESTS
+##### Pronator Drift (Upper Extremities)
+Pronation and downward drift:
+- **0**: None
+- **1**: Mild
+- **2**: Evident
+
+##### Position Test (Lower Extremities)
+Ask patient to lift both legs together, with legs fully extended at the knee. Sinking:
+- **0**: None
+- **1**: Mild
+- **2**: Evident
+- **3**: Able to lift only one leg at a time (grade from the horizontal position at the hip joints in degrees)
+- **4**: Unable to lift one leg at a time
+
+##### Walking on Heels/Toes
+- **0**: Normal
+- **1**: Impaired
+- **2**: Not possible
+
+##### Hopping on One Foot
+- **0**: Normal
+- **1**: 6–10 times
+- **2**: 1–5 times
+- **3**: Not possible
+
+#### LIMB SPASTICITY (AFTER RAPID FLEXION OF THE EXTREMITY)
+- **0**: None
+- **1**: Mild: barely increased muscle tone
+- **2**: Moderate: moderately increased muscle tone that can be overcome; full range of motion is possible
+- **3**: Severe: severely increased muscle tone that is extremely difficult to overcome; full range of motion is not possible
+- **4**: Contracted
+
+#### GAIT SPASTICITY
+- **0**: None
+- **1**: Barely perceptible
+- **2**: Evident: minor interference with function
+- **3**: Permanent shuffling: major interference with function
+
+#### OVERALL MOTOR PERFORMANCE
+- **0**: Normal
+- **1**: Abnormal weakness (as compared to peers) in performing more demanding tasks, e.g., walking longer distances; no reduction in limb strength on formal testing
+- **2**: Reduction in strength of individual muscle groups at confrontational testing
+
+#### FUNCTIONAL SYSTEM SCORE
+- **0**: Normal
+- **1**: Abnormal signs without disability
+- **2**: Minimal disability: patient complains of motor-fatigability or reduced performance in strenuous motor tasks (motor performance grade 1) and/or BMRC grade 4 in one or two muscle groups
+- **3**: Mild to moderate paraparesis or hemiparesis: usually BMRC grade 4 in more than two muscle groups; and/or BMRC grade 3 in one or two muscle groups (movements against gravity
+are possible); and/or severe monoparesis: BMRC grade 2 or less in one muscle group
+- **4**: Marked paraparesis or hemiparesis: usually BMRC grade 2 in two limbs or monoplegia with BMRC grade 0 or 1 in one limb; and/or moderate tetraparesis: BMRC grade 3 in three or more limbs
+- **5**: Paraplegia: BMRC grade 0 or 1 in all muscle groups of the lower limbs; and/or marked tetraparesis: BMRC grade 2 or less in three or more limbs; and/or hemiplegia
+- **6**: Tetraplegia: BMRC grade 0 or 1 in all muscle groups of the upper and lower limbs
+
+### CEREBELLAR FUNCTIONS
+
+#### HEAD TREMOR
+- **0**: none
+- **1**: mild
+- **2**: moderate
+- **3**: severe
+
+#### TRUNCAL ATAXIA
+- **0**: none
+- **1**: signs only
+- **2**: mild (swaying with eyes closed)
+- **3**: moderate (swaying with eyes open)
+- **4**: severe (unable to sit without assistance)
+
+#### LIMB ATAXIA (TREMOR / DYSMETRIA AND RAPID ALTERNATING MOVEMENTS)
+- **0**: none
+- **1**: signs only
+- **2**: mild (tremor or clumsy movements easily seen, minor interference with function)
+- **3**: moderate (tremor or clumsy movements interfere with function in all spheres)
+- **4**: severe (most functions are very difficult)
+
+#### TANDEM (STRAIGHT LINE) WALKING
+- **0**: normal
+- **1**: impaired
+- **2**: not possible
+
+#### GAIT ATAXIA
+- **0**: none
+- **1**: signs only
+- **2**: mild (problems with balance realized by patient and/or significant other)
+- **3**: moderate (abnormal balance with ordinary walking)
+- **4**: severe (unable to walk more than a few steps unassisted or requires a walking aid or assistance due to ataxia)
+
+#### ROMBERG TEST
+- **0**: normal
+- **1**: mild (mild instability with eyes closed)
+- **2**: moderate (not stable with eyes closed)
+- **3**: severe (not stable with eyes open)
+
+#### OTHER CEREBELLAR TESTS
+- **0**: normal
+- **1**: mild abnormality
+- **2**: moderate abnormality
+- **3**: severe abnormality
+
+**NOTE:**
+- The presence of severe gait and/or truncal ataxia alone (without severe ataxia in three or four limbs) results in a Cerebellar FS score of 3.
+- If weakness or sensory deficits interfere with the testing of ataxia, score the patient’s actual performance. Indicate the possible role of weakness by marking an "X" after the
+affected subsystems and Cerebellar FS score.
+
+#### FUNCTIONAL SYSTEM SCORE
+- **0**: normal
+- **1**: abnormal signs without disability
+- **2**: mild ataxia and/or moderate station ataxia (Romberg) and/or tandem walking not possible
+- **3**: moderate limb ataxia and/or moderate or severe gait/truncal ataxia
+- **4**: severe gait/truncal ataxia and severe ataxia in three or four limbs
+- **5**: unable to perform coordinated movements due to ataxia
+- **X**: pyramidal weakness (BMRC grade 3 or worse in limb strength) or sensory deficits interfere with cerebellar testing
+
+### SENSORY FUNCTIONS
+
+#### SUPERFICIAL SENSATION (LIGHT TOUCH AND PAIN)
+- **0**: normal
+- **1**: signs only (slightly diminished sensation on formal testing, patient not aware)
+- **2**: mild (patient aware of impaired light touch or pain but can discriminate sharp/dull)
+- **3**: moderate (impaired discrimination of sharp/dull)
+- **4**: marked (unable to discriminate between sharp/dull and/or unable to feel light touch)
+- **5**: complete loss (anesthesia)
+
+#### VIBRATION SENSE (AT THE MOST DISTAL JOINT)
+- **0**: normal
+- **1**: mild (graded tuning fork 5–7 of 8; detects more than 10 seconds but less than examiner)
+- **2**: moderate (graded tuning fork 1–4 of 8; detects between 2 and 10 sec.)
+- **3**: marked (complete loss of vibration sense)
+
+#### POSITION SENSE
+- **0**: normal
+- **1**: mild (1–2 incorrect responses, only distal joints affected)
+- **2**: moderate (misses many movements of fingers or toes; proximal joints affected)
+- **3**: marked (no perception of movement, astasia)
+
+* **LHERMITTE’S SIGN** (does not contribute to the Sensory FS score)
+  - **0**: negative
+  - **1**: positive
+
+* **PARAESTHESIAE (TINGLING)** (does not contribute to the Sensory FS score)
+  - **0**: none
+  - **1**: present
+
+#### FUNCTIONAL SYSTEM SCORE
+- **0**: normal
+- **1**: impaired superficial sensation in one or two limbs
+- **2**: mild impairment in more than two limbs, no major proprioceptive deficits
+- **3**: moderate impairment in more than two limbs with minor proprioceptive deficits
+- **4**: severe impairment in more than two limbs with significant proprioceptive deficits
+- **5**: loss of sensation in one or two limbs, significant proprioceptive deficits in most of the body below the head
+- **6**: essentially no sensation below the head
+
+### BOWEL AND BLADDER FUNCTIONS
+
+#### URINARY HESITANCY AND RETENTION
+- **0**: none
+- **1**: mild (no major impact on lifestyle)
+- **2**: moderate (urinary retention; frequent urinary tract infections)
+- **3**: severe (requires catheterization)
+- **4**: loss of function (overflow incontinence)
+
+#### URINARY URGENCY AND INCONTINENCE
+- **0**: none
+- **1**: mild (no major impact on lifestyle)
+- **2**: moderate (rare incontinence occurring no more than once a week; must wear pads)
+- **3**: severe (frequent incontinence occurring from several times a week to more than once a day; must wear urinal or pads)
+- **4**: loss of function (loss of bladder control)
+
+#### BLADDER CATHETERIZATION
+- **0**: none
+- **1**: intermittent self-catheterization
+- **2**: constant catheterization
+
+#### BOWEL DYSFUNCTION
+- **0**: none
+- **1**: mild (no incontinence, no major impact on lifestyle, mild constipation)
+- **2**: moderate (must wear pads or alter lifestyle to be near lavatory)
+- **3**: severe (in need of enemas or manual measures to evacuate bowels)
+- **4**: complete loss of function
+
+#### SEXUAL DYSFUNCTION
+**Male**
+- **0**: none
+- **1**: mild (difficulty maintaining erection during intercourse, but achieves erection and still has intercourse)
+- **2**: moderate (difficulty achieving erection, decreased libido, still has intercourse and reaches orgasm)
+- **3**: severe (marked decrease in libido, inability to achieve full erection, intercourse with difficulty, hypoorgasmia)
+- **4**: loss of function
+
+**Female**
+- **0**: none
+- **1**: mild (mild lack of lubrication, still sexually active and reaches orgasm)
+- **2**: moderate (dyspareunia, hypoorgasmia, decrease in sexual activity)
+- **3**: severe (marked decrease in sexual activity, anorgasmia)
+- **4**: loss of function
+
+**NOTE**
+When determining the EDSS step, the Bowel and Bladder FS score must be converted to a lower score as follows:
+- Bowel and Bladder FS Score: 6 → Converted Bowel and Bladder FS Score: 5
+- Bowel and Bladder FS Score: 5 → Converted Bowel and Bladder FS Score: 4
+- Bowel and Bladder FS Score: 4 → Converted Bowel and Bladder FS Score: 3
+- Bowel and Bladder FS Score: 3 → Converted Bowel and Bladder FS Score: 3
+- Bowel and Bladder FS Score: 2 → Converted Bowel and Bladder FS Score: 2
+- Bowel and Bladder FS Score: 1 → Converted Bowel and Bladder FS Score: 1
+
+Sexual dysfunction can be documented but generally does not impact the FS score due to assessment difficulties by examining physicians.
+
+### FUNCTIONAL SYSTEM SCORE
+- **0**: normal
+- **1**: mild urinary hesitancy, urgency, and/or constipation
+- **2**: moderate urinary hesitancy/retention and/or moderate urinary urgency/incontinence and/or moderate bowel dysfunction
+- **3**: frequent urinary incontinence or intermittent self-catheterization; needs enemas or manual measures to evacuate bowels
+- **4**: in need of almost constant catheterization
+- **5**: loss of bladder or bowel function (external or indwelling catheter)
+- **6**: loss of bowel and bladder function
+
+### CEREBRAL FUNCTIONS
+
+#### DEPRESSION AND EUPHORIA
+- **0**: none
+- **1**: present (Patient complains of depression or is considered depressed or euphoric by the investigator or significant other.)
+
+**Note**: Depression and Euphoria are documented on the scoring sheet but are not taken into consideration for FS and EDSS calculation.
+
+#### DECREASE IN MENTATION
+- **0**: none
+- **1**: signs only (not apparent to patient and/or significant other)
+- **2**: mild (Patient and/or significant other report mild changes in mentation. Examples include: impaired ability to follow a rapid course of association or survey complex matters;
+impaired judgment in certain demanding situations; capable of handling routine daily activities, but unable to tolerate additional stressors; intermittently symptomatic even with
+normal levels of stress; reduced performance; tendency toward negligence due to obliviousness or fatigue.)
+- **3**: moderate (Definite abnormalities on brief mental status testing, but still oriented to person, place, and time)
+- **4**: marked (Not oriented in one or two spheres (person, place, or time); marked effect on lifestyle)
+- **5**: dementia, confusion, and/or complete disorientation
+
+#### FATIGUE
+- **0**: none
+- **1**: mild (Does not usually interfere with daily activities)
+- **2**: moderate (Interferes but does not limit daily activities for more than 50%)
+- **3**: severe (Significant limitation in daily activities (> 50% reduction))
+
+**Note**: Because fatigue is difficult to evaluate objectively, in some studies it does not contribute to the Cerebral FS score or EDSS step. Please adhere to the study’s specific
+instructions.
+
+### FUNCTIONAL SYSTEM SCORE
+- **0**: normal
+- **1**: signs only in decrease in mentation; mild fatigue
+- **2**: mild decrease in mentation; moderate or severe fatigue
+- **3**: moderate decrease in mentation
+- **4**: marked decrease in mentation
+- **5**: dementia
+
+### AMBULATION
+
+**Unrestricted Ambulation**
+- The patient can walk a normal distance without assistance, comparable to healthy individuals of similar age and physical condition.
+- EDSS step can range from 0 to 5.0, depending on the Functional System (FS) scores.
+
+**Fully Ambulatory**
+- At least 500 meters of ambulation without assistance, but not unrestricted.
+- EDSS step can range from 2.0 to 5.0, depending on FS scores.
+- The Pyramidal and/or Cerebellar FS must be ≥ 2 to reflect this restriction in ambulation.
+
+**Ambulation < 500 Meters**
+- If the walking distance is less than 500 meters, the EDSS step must be ≥ 4.5, depending on the walking ranges provided by the ambulation score and combination of FS scores.
+- EDSS steps 5.5 to 8.0 are exclusively defined by the ability to ambulate and type of assistance required, or the ability to use a wheelchair.
+
+**Assistance Needed**
+- Definitions for EDSS steps 6.0 or 6.5 include both the type of assistance required when walking and the walking range.
+- Assistance by another person is equivalent to bilateral assistance.
+
+**Note:**
+- The ambulation score represents both the walking range and the type of assistance required.
+- This score replaces several checkboxes used previously on the scoring sheet but does not introduce new definitions.
+- Use of a wheelchair can now be scored on the scoring sheet.
+- Indicate the reported distance and time for the patient in the appropriate field on the scoring sheet, followed by the type of assistance and walking distance measured during assessment.
+
+
+### DISTANCE AND TIME REPORTED BY PATIENT
+
+**Maximal Unassisted Walking Distance**
+- Maximal unassisted walking distance reported by the patient (in meters) without rest or assistance.
+- Time required to walk the maximum distance according to the patient (in minutes).
+
+**Assistance**
+0. Without help or assistance (allowing use of an ankle-foot orthotic device, but no other assistive devices).
+1. Unilateral assistance: one stick/crutch/brace.
+2. Bilateral assistance: two sticks/crutches/braces or assistance by another person.
+3. Wheelchair.
+
+**Distance**
+- Measure the distance the patient can walk in meters.
+  - **Unassisted:** Observe walking for a minimum of 500 meters and measure time needed, if possible.
+  - **Assisted:** Observe walking with assistive devices or help from another person for a minimum of 130 meters, if possible.
+
+---
+
+### AMBULATION SCORE
+
+0. Unrestricted
+1. Fully ambulatory
+2. ≥ 300 meters but < 500 meters, without help or assistance (EDSS 4.5 or 5.0)
+3. ≥ 200 meters but < 300 meters, without help or assistance (EDSS 5.0)
+4. ≥ 100 meters but < 200 meters, without help or assistance (EDSS 5.5)
+5. Walking range < 100 meters without assistance (EDSS 6.0)
+6. Unilateral assistance, ≥ 50 meters (EDSS 6.0)
+7. Bilateral assistance, ≥ 120 meters (EDSS 6.0)
+8. Unilateral assistance, < 50 meters (EDSS 6.5)
+9. Bilateral assistance, ≥ 5 meters but < 120 meters (EDSS 6.5)
+10. Uses wheelchair without help; unable to walk 5 meters even with aid, essentially restricted to wheelchair; wheels self and transfers alone; up and about in wheelchair for some 12 hours a day (EDSS 7.0)
+11. Uses wheelchair with help; unable to take more than a few steps; restricted to wheelchair; may need some help in transferring and wheeling self (EDSS 7.5)
+12. Essentially restricted to bed or chair or perambulated in wheelchair, but out of bed most of the day; retains many self-care functions; generally has effective use of arms (EDSS 8.0)
+
+Expanded Disability Status Scale (EDSS)
+
+0   - Normal neurological exam (all Functional Systems [FS] grade 0)
+1.0 - No disability, minimal signs in one FS (one FS grade 1)
+1.5 - No disability, minimal signs in more than one FS (more than one FS grade 1)
+2.0 - Minimal disability in one FS (one FS grade 2, others 0 or 1)
+2.5 - Minimal disability in two FS (two FS grades 2, others 0 or 1)
+3.0 - Moderate disability in one FS (one FS grade 3, others 0 or 1) though fully ambulatory;
+or mild disability in three or four FS (three/four FS grades 2, others 0 or 1) though fully ambulatory
+3.5 - Fully ambulatory but with moderate disability in one FS (one FS grade 3) and mild disability in one or two FS (one/two FS grade 2) and others 0 or 1;
+or fully ambulatory with two FS grades 3 (others 0 or 1);
+or fully ambulatory with five FS grades 2 (others 0 or 1)
+4.0 - Unable to walk > 25 feet without aid
+4.5 - Unable to walk > 100 feet without aid
+5.0 - Relies on a walking aid; unable to walk > 300 feet without resting
+5.5 - Relies on a walking aid; unable to walk > 200 feet without resting
+6.0 - Unable to walk more than 50 feet with or without aid; cannot stand unaided for five minutes
+6.5 - Unable to walk more than 10 feet with or without aid; cannot stand unaided for two minutes
+7.0 - Unable to walk 5 meters even with aid, essentially restricted to wheelchair; wheels self and transfers alone; up and about in wheelchair some 12 hours a day
+7.5 - Unable to take more than a few steps; restricted to wheelchair; may need some help in transferring and in wheeling self
+8.0 - Essentially restricted to bed or chair or perambulated in wheelchair, but out of bed most of the day; retains many self-care functions; generally has effective use of arms
+8.5 - Essentially restricted to bed much of the day; has some effective use of arm(s); retains some self-care functions
+9.0 - Helpless bed patient; can communicate and eat
+9.5 - Totally helpless bed patient; unable to communicate effectively or eat/swallow
+10  - Death due to MS
+
+
@@ -0,0 +1,11 @@
+EDSS-kv ::= "\"EDSS\"" space ":" space number
+Reason ::= "\"" char{0,400} "\"" space
+Reason-kv ::= "\"Reason\"" space ":" space Reason
+boolean ::= ("true" | "false") space
+char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
+decimal-part ::= [0-9]{1,16}
+integral-part ::= [0] | [1-9] [0-9]{0,15}
+nicht-klassifizierbar-kv ::= "\"nicht_klassifizierbar\"" space ":" space boolean
+number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
+root ::= "{" space Reason-kv "," space nicht-klassifizierbar-kv ( "," space ( EDSS-kv ) )? "}" space
+space ::= | " " | "\n"{1,2} [ \t]{0,20}
@@ -0,0 +1,25 @@
+Expanded Disability Status Scale (EDSS)
+
+0   - Normal neurological exam (all Functional Systems [FS] grade 0)
+1.0 - No disability, minimal signs in one FS (one FS grade 1)
+1.5 - No disability, minimal signs in more than one FS (more than one FS grade 1)
+2.0 - Minimal disability in one FS (one FS grade 2, others 0 or 1)
+2.5 - Minimal disability in two FS (two FS grades 2, others 0 or 1)
+3.0 - Moderate disability in one FS (one FS grade 3, others 0 or 1) though fully ambulatory;
+or mild disability in three or four FS (three/four FS grades 2, others 0 or 1) though fully ambulatory
+3.5 - Fully ambulatory but with moderate disability in one FS (one FS grade 3) and mild disability in one or two FS (one/two FS grade 2) and others 0 or 1;
+or fully ambulatory with two FS grades 3 (others 0 or 1);
+or fully ambulatory with five FS grades 2 (others 0 or 1)
+4.0 - Unable to walk > 25 feet without aid
+4.5 - Unable to walk > 100 feet without aid
+5.0 - Relies on a walking aid; unable to walk > 300 feet without resting
+5.5 - Relies on a walking aid; unable to walk > 200 feet without resting
+6.0 - Unable to walk more than 50 feet with or without aid; cannot stand unaided for five minutes
+6.5 - Unable to walk more than 10 feet with or without aid; cannot stand unaided for two minutes
+7.0 - Unable to walk 5 meters even with aid, essentially restricted to wheelchair; wheels self and transfers alone; up and about in wheelchair some 12 hours a day
+7.5 - Unable to take more than a few steps; restricted to wheelchair; may need some help in transferring and in wheeling self
+8.0 - Essentially restricted to bed or chair or perambulated in wheelchair, but out of bed most of the day; retains many self-care functions; generally has effective use of arms
+8.5 - Essentially restricted to bed much of the day; has some effective use of arm(s); retains some self-care functions
+9.0 - Helpless bed patient; can communicate and eat
+9.5 - Totally helpless bed patient; unable to communicate effectively or eat/swallow
+10  - Death due to MS
@@ -263,3 +263,120 @@ plt.legend(frameon=False, loc='upper center', bbox_to_anchor=(0.5, -0.05))
 plt.tight_layout()
 plt.show()
 ##
+
+
+
+
+# %% name
+import matplotlib.pyplot as plt
+
+# Data
+data = {
+    'Visit': [9, 8, 7, 6, 5, 4, 3, 2, 1],
+    'patient_count': [2, 3, 3, 6, 13, 17, 28, 24, 32]
+}
+
+# Create figure and axis
+fig, ax = plt.subplots(figsize=(10, 6))
+
+# Plot the bar chart
+bars = ax.bar(data['Visit'], data['patient_count'], color='darkblue', label='Patients by Visit Count')
+
+# Add labels and title
+ax.set_xlabel('Visit Number (from last to first)', fontsize=12)
+ax.set_ylabel('Number of Patients', fontsize=12)
+ax.set_title('Patient Visits by Visit Number', fontsize=14)
+
+# Invert x-axis to show Visit 9 on the left (descending order) if desired, but keep natural order (1–9 left to right)
+# For descending order (9→1 from left to right), we'd need to reverse:
+# Visit = data['Visit'][::-1], patient_count = data['patient_count'][::-1]
+# But standard practice is ascending (1 to 9), so we'll sort accordingly:
+# Let's sort by Visit to ensure left-to-right: 1,2,...,9
+
+# Actually, your current Visit list is [9,8,...,1], which is descending.
+# Let's sort by Visit for intuitive left-to-right increasing order:
+sorted_indices = sorted(range(len(data['Visit'])), key=lambda i: data['Visit'][i])
+visit_sorted = [data['Visit'][i] for i in sorted_indices]
+count_sorted = [data['patient_count'][i] for i in sorted_indices]
+
+# Re-plot with sorted x-axis:
+ax.clear()
+bars = ax.bar(visit_sorted, count_sorted, color='darkblue', label='Patients by Visit Count')
+
+# Re-apply labels, etc.
+ax.set_xlabel('Number of Visits', fontsize=12)
+ax.set_ylabel('Number of Unique Patients', fontsize=12)
+#ax.set_title('Number of Patients by Visit Number', fontsize=14)
+
+# Add legend
+ax.legend()
+
+# Improve layout and grid
+ax.grid(axis='y', linestyle='--', alpha=0.7)
+plt.xticks(visit_sorted)  # Ensure all integer visit numbers are shown
+
+# Show the plot
+plt.tight_layout()
+plt.show()
+
+##
+
+# %% Patientjourney Bubble chart
+import matplotlib.pyplot as plt
+import numpy as np
+
+import matplotlib as mpl
+
+mpl.rcParams["font.family"] = "DejaVu Sans"   # or "Arial", "Calibri", "Times New Roman", ...
+mpl.rcParams["font.size"] = 12                # default size for text
+mpl.rcParams["axes.titlesize"] = 14
+mpl.rcParams["axes.titleweight"] = "bold"
+
+
+# Data (your counts)
+visits = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
+patient_count = np.array([32, 24, 28, 17, 13, 6, 3, 3, 2])
+
+# "Remaining" = patients with >= that many visits (cumulative from the right)
+remaining = np.array([patient_count[i:].sum() for i in range(len(patient_count))])
+
+# --- Plot ---
+fig, ax = plt.subplots(figsize=(12, 3))
+
+y = 0.0  # all bubbles on one horizontal line
+
+# Horizontal line
+ax.hlines(y, visits.min() - 0.4, visits.max() + 0.4, color="#1f77b4", linewidth=3)
+
+# Bubble sizes (scale as needed)
+# (Matplotlib scatter uses area in points^2)
+sizes = patient_count * 35  # tweak this multiplier if you want bigger/smaller bubbles
+
+ax.scatter(visits, np.full_like(visits, y), s=sizes, color="#1f77b4", zorder=3)
+
+# Title
+#ax.set_title("Patient Journey by Visit Count", fontsize=14, pad=18)
+
+# Top labels: "1 visits", "2 visits", ...
+for x in visits:
+    label = f"{x} visit" if x == 1 else f"{x} visits"
+    ax.text(x, y + 0.18, label, ha="center", va="bottom", fontsize=10)
+
+# Bottom labels: "X patients" and "Y remaining"
+for x, pc, rem in zip(visits, patient_count, remaining):
+    ax.text(x, y - 0.20, f"{pc} patients", ha="center", va="top", fontsize=9)
+    ax.text(x, y - 0.32, f"{rem} remaining", ha="center", va="top", fontsize=9)
+
+# Cosmetics: remove axes, keep spacing nice
+ax.set_xlim(visits.min() - 0.6, visits.max() + 0.6)
+ax.set_ylim(-0.5, 0.35)
+ax.set_xticks([])
+ax.set_yticks([])
+for spine in ax.spines.values():
+    spine.set_visible(False)
+
+plt.tight_layout()
+plt.show()
+plt.savefig("patient_journey.svg", format="svg", bbox_inches="tight")
+##
+
Author	SHA1	Message	Date
shahin	c9cf9ae9a0	optimized results and new benchmark	2026-05-29 00:42:40 +02:00
shahin	1b7c6a3852	adjustment to triton	2026-05-19 10:21:24 +02:00
shahin	bb9fcf20ae	adjusting the script with new paths	2026-05-19 10:13:29 +02:00
shahin	98df7c70f1	New Organised one	2026-05-19 10:03:52 +02:00
shahin	69f6e76bfe	clean gitignore	2026-05-19 09:23:31 +02:00
shahin	590f2cd68e	Added Loop for multiple models.	2026-05-16 16:50:33 +02:00
shahin	f6ec60e685	isabella box and Error disagreement plot	2026-05-04 16:41:42 +02:00
shahin	c9db7b5163	backup	2026-05-04 14:48:03 +02:00
shahin	09808f1fd4	merge the changes of dashboard with GAP	2026-05-04 14:46:47 +02:00
shahin	90d411f086	Modifications	2026-04-27 11:52:53 +02:00
shahin	816c50e467	Config Dashbprard	2026-02-23 18:19:50 +01:00
shahin	118e3e63b3	refinement	2026-02-23 15:06:54 +01:00
shahin	99862629b8	update gitignore	2026-02-23 00:43:33 +01:00
shahin	9cc80cd3e6	Audit code	2026-02-23 00:42:41 +01:00
shahin	424d38ad1c	certainty Delta show	2026-02-18 17:12:31 +01:00
shahin	f1d22b28ad	updated plot certainty	2026-02-13 09:22:53 +01:00
shahin	8e4a43c557	add certainty	2026-02-12 13:39:36 +01:00
shahin	2f507bcf20	Adjsuting and cleaning	2026-02-08 01:59:38 +01:00
shahin	f4bf37f71c	show directional errors Directional Errors of each functional system.	2026-02-08 01:27:48 +01:00
shahin	bc63d1ee72	added new confusion matrix	2026-02-04 18:01:11 +01:00
shahin	c2ccb8cd11	update gitignore	2026-02-04 15:29:56 +01:00
shahin	b2e9ccd2b6	adding some visualizations	2026-01-26 02:02:19 +01:00
shahin	2f1bd2bfd0	save	2026-01-20 14:47:53 +01:00