23 Commits

Author SHA1 Message Date
shahin c9cf9ae9a0 optimized results and new benchmark 2026-05-29 00:42:40 +02:00
shahin 1b7c6a3852 adjustment to triton 2026-05-19 10:21:24 +02:00
shahin bb9fcf20ae adjusting the script with new paths 2026-05-19 10:13:29 +02:00
shahin 98df7c70f1 New Organised one 2026-05-19 10:03:52 +02:00
shahin 69f6e76bfe clean gitignore 2026-05-19 09:23:31 +02:00
shahin 590f2cd68e Added Loop for multiple models. 2026-05-16 16:50:33 +02:00
shahin f6ec60e685 isabella box and Error disagreement plot 2026-05-04 16:41:42 +02:00
shahin c9db7b5163 backup 2026-05-04 14:48:03 +02:00
shahin 09808f1fd4 merge the changes of dashboard with GAP 2026-05-04 14:46:47 +02:00
shahin 90d411f086 Modifications 2026-04-27 11:52:53 +02:00
shahin 816c50e467 Config Dashbprard 2026-02-23 18:19:50 +01:00
shahin 118e3e63b3 refinement 2026-02-23 15:06:54 +01:00
shahin 99862629b8 update gitignore 2026-02-23 00:43:33 +01:00
shahin 9cc80cd3e6 Audit code 2026-02-23 00:42:41 +01:00
shahin 424d38ad1c certainty Delta show 2026-02-18 17:12:31 +01:00
shahin f1d22b28ad updated plot certainty 2026-02-13 09:22:53 +01:00
shahin 8e4a43c557 add certainty 2026-02-12 13:39:36 +01:00
shahin 2f507bcf20 Adjsuting and cleaning 2026-02-08 01:59:38 +01:00
shahin f4bf37f71c show directional errors
Directional Errors of each functional system.
2026-02-08 01:27:48 +01:00
shahin bc63d1ee72 added new confusion matrix 2026-02-04 18:01:11 +01:00
shahin c2ccb8cd11 update gitignore 2026-02-04 15:29:56 +01:00
shahin b2e9ccd2b6 adding some visualizations 2026-01-26 02:02:19 +01:00
shahin 2f1bd2bfd0 save 2026-01-20 14:47:53 +01:00
15 changed files with 22478 additions and 972 deletions
+84 -12
View File
@@ -1,16 +1,88 @@
# 1. Broad Ignores
/Data/*
/attach/*
/results/*
/enarcelona/*
.env
# =========================
# Python
# =========================
__pycache__/
*.pyc
*.py[cod]
*$py.class
.ipynb_checkpoints/
# 2. Ignore virtual environments COMPLETELY
# This must come BEFORE the unignore rule
# =========================
# Virtual environments
# =========================
env/
env*/
venv/
.venv/
enarcelona/
# 3. The "Unignore" rule (Whitelisting)
# We only unignore .py files that aren't already blocked by the rules above
!**/*.py
# =========================
# Secrets
# =========================
.env
*.env
# =========================
# Patient data / sensitive data
# =========================
Data/
data/raw/
data/processed/
data/ground_truth/
reference/
# =========================
# Generated results and logs
# =========================
results/
results_edss_benchmark/
*.log
# =========================
# Large/generated file types
# =========================
*.csv
*.tsv
*.json
*.jsonl
*.xlsx
*.xls
*.png
*.PNG
*.jpg
*.jpeg
*.svg
*.pdf
# =========================
# Temporary / backup files
# =========================
*.tmp
*.bak
*.orig
.DS_Store
# =========================
# Keep important code/config/docs
# =========================
!README.md
!requirements.txt
!*.py
!*.md
!*.yml
!*.yaml
!*.toml
# Keep prompt templates / schemas if safe to publish
!prompts/
!prompts/**
!attach/
!attach/*.gbnf
!attach/just_edss_text.txt
!attach/Komplett.txt
# Keep example/synthetic data only
!data/
!data/example/
!data/example/**
!Data/example/
!Data/example/**
-748
View File
@@ -1,748 +0,0 @@
# %% Scatter
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Load your data from TSV file
file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/join_MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_results+MS_Briefe_400_with_unique_id_SHA3_explore_cleaned.tsv'
df = pd.read_csv(file_path, sep='\t')
# Replace comma with dot for numeric conversion in GT_EDSS and LLM_Results
df['GT_EDSS'] = df['GT_EDSS'].astype(str).str.replace(',', '.')
df['LLM_Results'] = df['LLM_Results'].astype(str).str.replace(',', '.')
# Convert to float (handle invalid entries gracefully)
df['GT_EDSS'] = pd.to_numeric(df['GT_EDSS'], errors='coerce')
df['LLM_Results'] = pd.to_numeric(df['LLM_Results'], errors='coerce')
# Drop rows where either column is NaN
df_clean = df.dropna(subset=['GT_EDSS', 'LLM_Results'])
# Create scatter plot
plt.figure(figsize=(8, 6))
plt.scatter(df_clean['GT_EDSS'], df_clean['LLM_Results'], alpha=0.7, color='blue')
# Add labels and title
plt.xlabel('GT_EDSS')
plt.ylabel('LLM_Results')
plt.title('Comparison of GT_EDSS vs LLM_Results')
# Optional: Add a diagonal line for reference (perfect prediction)
plt.plot([0, max(df_clean['GT_EDSS'])], [0, max(df_clean['GT_EDSS'])], color='red', linestyle='--', label='Perfect Prediction')
plt.legend()
# Show plot
plt.grid(True)
plt.tight_layout()
plt.show()
##
# %% Bland0-altman
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
# Load your data from TSV file
file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/join_MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_results+MS_Briefe_400_with_unique_id_SHA3_explore_cleaned.tsv'
df = pd.read_csv(file_path, sep='\t')
# Replace comma with dot for numeric conversion in GT_EDSS and LLM_Results
df['GT_EDSS'] = df['GT_EDSS'].astype(str).str.replace(',', '.')
df['LLM_Results'] = df['LLM_Results'].astype(str).str.replace(',', '.')
# Convert to float (handle invalid entries gracefully)
df['GT_EDSS'] = pd.to_numeric(df['GT_EDSS'], errors='coerce')
df['LLM_Results'] = pd.to_numeric(df['LLM_Results'], errors='coerce')
# Drop rows where either column is NaN
df_clean = df.dropna(subset=['GT_EDSS', 'LLM_Results'])
# Create Bland-Altman plot
f, ax = plt.subplots(1, figsize=(8, 5))
sm.graphics.mean_diff_plot(df_clean['GT_EDSS'], df_clean['LLM_Results'], ax=ax)
# Add labels and title
ax.set_title('Bland-Altman Plot: GT_EDSS vs LLM_Results')
ax.set_xlabel('Mean of GT_EDSS and LLM_Results')
ax.set_ylabel('Difference between GT_EDSS and LLM_Results')
# Display Bland-Altman plot
plt.tight_layout()
plt.show()
# Print some statistics
mean_diff = np.mean(df_clean['GT_EDSS'] - df_clean['LLM_Results'])
std_diff = np.std(df_clean['GT_EDSS'] - df_clean['LLM_Results'])
print(f"Mean difference: {mean_diff:.3f}")
print(f"Standard deviation of differences: {std_diff:.3f}")
print(f"95% Limits of Agreement: [{mean_diff - 1.96*std_diff:.3f}, {mean_diff + 1.96*std_diff:.3f}]")
##
# %% Confusion matrix
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
# Load your data from TSV file
file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv'
df = pd.read_csv(file_path, sep='\t')
# Replace comma with dot for numeric conversion in GT.EDSS and result.EDSS
df['GT.EDSS'] = df['GT.EDSS'].astype(str).str.replace(',', '.')
df['result.EDSS'] = df['result.EDSS'].astype(str).str.replace(',', '.')
# Convert to float (handle invalid entries gracefully)
df['GT.EDSS'] = pd.to_numeric(df['GT.EDSS'], errors='coerce')
df['result.EDSS'] = pd.to_numeric(df['result.EDSS'], errors='coerce')
# Drop rows where either column is NaN
df_clean = df.dropna(subset=['GT.EDSS', 'result.EDSS'])
# For confusion matrix, we need to categorize the values
# Let's create categories up to 10 (0-1, 1-2, 2-3, ..., 9-10)
def categorize_edss(value):
if pd.isna(value):
return np.nan
elif value <= 1.0:
return '0-1'
elif value <= 2.0:
return '1-2'
elif value <= 3.0:
return '2-3'
elif value <= 4.0:
return '3-4'
elif value <= 5.0:
return '4-5'
elif value <= 6.0:
return '5-6'
elif value <= 7.0:
return '6-7'
elif value <= 8.0:
return '7-8'
elif value <= 9.0:
return '8-9'
elif value <= 10.0:
return '9-10'
else:
return '10+'
# Create categorical versions
df_clean['GT.EDSS_cat'] = df_clean['GT.EDSS'].apply(categorize_edss)
df_clean['result.EDSS_cat'] = df_clean['result.EDSS'].apply(categorize_edss)
# Remove any NaN categories
df_clean = df_clean.dropna(subset=['GT.EDSS_cat', 'result.EDSS_cat'])
# Create confusion matrix
cm = confusion_matrix(df_clean['GT.EDSS_cat'], df_clean['result.EDSS_cat'],
labels=['0-1', '1-2', '2-3', '3-4', '4-5', '5-6', '6-7', '7-8', '8-9', '9-10'])
# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=['0-1', '1-2', '2-3', '3-4', '4-5', '5-6', '6-7', '7-8', '8-9', '9-10'],
yticklabels=['0-1', '1-2', '2-3', '3-4', '4-5', '5-6', '6-7', '7-8', '8-9', '9-10'])
plt.title('Confusion Matrix: Ground truth EDSS vs interferred EDSS (Categorized 0-10)')
plt.xlabel('LLM Generated EDSS')
plt.ylabel('Ground Truth EDSS')
plt.tight_layout()
plt.show()
# Print classification report
print("Classification Report:")
print(classification_report(df_clean['GT.EDSS_cat'], df_clean['result.EDSS_cat']))
# Print raw counts
print("\nConfusion Matrix (Raw Counts):")
print(cm)
##
# %% Classification
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
import numpy as np
# Load your data from TSV file
file_path ='/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv'
df = pd.read_csv(file_path, sep='\t')
# Check data structure
print("Data shape:", df.shape)
print("First few rows:")
print(df.head())
print("\nColumn names:")
for col in df.columns:
print(f" {col}")
# Function to safely convert to boolean
def safe_bool_convert(series):
'''Safely convert series to boolean, handling various input formats'''
# Convert to string first, then to boolean
series_str = series.astype(str).str.strip().str.lower()
# Handle different true/false representations
bool_map = {
'true': True, '1': True, 'yes': True, 'y': True,
'false': False, '0': False, 'no': False, 'n': False
}
converted = series_str.map(bool_map)
# Handle remaining NaN values
converted = converted.fillna(False) # or True, depending on your preference
return converted
# Convert columns safely
if 'result.klassifizierbar' in df.columns:
print("\nresult.klassifizierbar column info:")
print(df['result.klassifizierbar'].head(10))
print("Unique values:", df['result.klassifizierbar'].unique())
df['result.klassifizierbar'] = safe_bool_convert(df['result.klassifizierbar'])
print("After conversion:")
print(df['result.klassifizierbar'].value_counts())
if 'GT.klassifizierbar' in df.columns:
print("\nGT.klassifizierbar column info:")
print(df['GT.klassifizierbar'].head(10))
print("Unique values:", df['GT.klassifizierbar'].unique())
df['GT.klassifizierbar'] = safe_bool_convert(df['GT.klassifizierbar'])
print("After conversion:")
print(df['GT.klassifizierbar'].value_counts())
# Create bar chart showing only True values for klassifizierbar
if 'result.klassifizierbar' in df.columns and 'GT.klassifizierbar' in df.columns:
# Get counts for True values only
llm_true_count = df['result.klassifizierbar'].sum()
gt_true_count = df['GT.klassifizierbar'].sum()
# Plot using matplotlib directly
fig, ax = plt.subplots(figsize=(8, 6))
x = np.arange(2)
width = 0.35
bars1 = ax.bar(x[0] - width/2, llm_true_count, width, label='LLM', color='skyblue', alpha=0.8)
bars2 = ax.bar(x[1] + width/2, gt_true_count, width, label='GT', color='lightcoral', alpha=0.8)
# Add value labels on bars
ax.annotate(f'{llm_true_count}',
xy=(x[0], llm_true_count),
xytext=(0, 3),
textcoords="offset points",
ha='center', va='bottom')
ax.annotate(f'{gt_true_count}',
xy=(x[1], gt_true_count),
xytext=(0, 3),
textcoords="offset points",
ha='center', va='bottom')
ax.set_xlabel('Classification Status (klassifizierbar)')
ax.set_ylabel('Count')
ax.set_title('True Values Comparison: LLM vs GT for "klassifizierbar"')
ax.set_xticks(x)
ax.set_xticklabels(['LLM', 'GT'])
ax.legend()
plt.tight_layout()
plt.show()
# Create confusion matrix if both columns exist
if 'result.klassifizierbar' in df.columns and 'GT.klassifizierbar' in df.columns:
try:
# Ensure both columns are boolean
llm_bool = df['result.klassifizierbar'].fillna(False).astype(bool)
gt_bool = df['GT.klassifizierbar'].fillna(False).astype(bool)
cm = confusion_matrix(gt_bool, llm_bool)
# Plot confusion matrix
fig, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=['False ', 'True '],
yticklabels=['False', 'True '],
ax=ax)
ax.set_xlabel('LLM Predictions ')
ax.set_ylabel('GT Labels ')
ax.set_title('Confusion Matrix: LLM vs GT for "klassifizierbar"')
plt.tight_layout()
plt.show()
print("Confusion Matrix:")
print(cm)
except Exception as e:
print(f"Error creating confusion matrix: {e}")
# Show final data info
print("\nFinal DataFrame info:")
print(df[['result.klassifizierbar', 'GT.klassifizierbar']].info())
##
# %% Boxplot
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
# Load your data from TSV file
file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/join_results_unique.tsv'
df = pd.read_csv(file_path, sep='\t')
# Replace comma with dot for numeric conversion in GT.EDSS and result.EDSS
df['GT.EDSS'] = df['GT.EDSS'].astype(str).str.replace(',', '.')
df['result.EDSS'] = df['result.EDSS'].astype(str).str.replace(',', '.')
# Convert to float (handle invalid entries gracefully)
df['GT.EDSS'] = pd.to_numeric(df['GT.EDSS'], errors='coerce')
df['result.EDSS'] = pd.to_numeric(df['result.EDSS'], errors='coerce')
# Drop rows where either column is NaN
df_clean = df.dropna(subset=['GT.EDSS', 'result.EDSS'])
# 1. DEFINE CATEGORY ORDER
# This ensures the X-axis is numerically logical (0-1 comes before 1-2)
category_order = ['0-1', '1-2', '2-3', '3-4', '4-5', '5-6', '6-7', '7-8', '8-9', '9-10', '10+']
# Convert the column to a Categorical type with the specific order
df_clean['GT.EDSS_cat'] = pd.Categorical(df_clean['GT.EDSS'].apply(categorize_edss),
categories=category_order,
ordered=True)
plt.figure(figsize=(14, 8))
# 2. ADD HUE FOR LEGEND
# Assigning x to 'hue' allows Seaborn to generate a legend automatically
box_plot = sns.boxplot(
data=df_clean,
x='GT.EDSS_cat',
y='result.EDSS',
hue='GT.EDSS_cat', # Added hue
palette='viridis',
linewidth=1.5,
legend=True # Ensure legend is enabled
)
# 3. CUSTOMIZE PLOT
plt.title('Distribution of result.EDSS by GT.EDSS Category', fontsize=18, pad=20)
plt.xlabel('Ground Truth EDSS Category', fontsize=14)
plt.ylabel('LLM Predicted EDSS', fontsize=14)
# Move legend to the side or top
plt.legend(title="EDSS Categories", bbox_to_anchor=(1.05, 1), loc='upper left')
plt.xticks(rotation=45, ha='right', fontsize=10)
plt.grid(True, axis='y', alpha=0.3)
plt.tight_layout()
plt.show()
##
# %% Postproccessing Column names
import pandas as pd
# Read the TSV file
file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv'
df = pd.read_csv(file_path, sep='\t')
# Create a mapping dictionary for German to English column names
column_mapping = {
'EDSS':'GT.EDSS',
'klassifizierbar': 'GT.klassifizierbar',
'Sehvermögen': 'GT.VISUAL_OPTIC_FUNCTIONS',
'Cerebellum': 'GT.CEREBELLAR_FUNCTIONS',
'Hirnstamm': 'GT.BRAINSTEM_FUNCTIONS',
'Sensibiliät': 'GT.SENSORY_FUNCTIONS',
'Pyramidalmotorik': 'GT.PYRAMIDAL_FUNCTIONS',
'Ambulation': 'GT.AMBULATION',
'Cerebrale_Funktion': 'GT.CEREBRAL_FUNCTIONS',
'Blasen-_und_Mastdarmfunktion': 'GT.BOWEL_AND_BLADDER_FUNCTIONS'
}
# Rename columns
df = df.rename(columns=column_mapping)
# Save the modified dataframe back to TSV file
df.to_csv(file_path, sep='\t', index=False)
print("Columns have been successfully renamed!")
print("Renamed columns:")
for old_name, new_name in column_mapping.items():
if old_name in df.columns:
print(f" {old_name} -> {new_name}")
##
# %% Styled table
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import dataframe_image as dfi
# Load data
df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t')
# 1. Identify all GT and result columns
gt_columns = [col for col in df.columns if col.startswith('GT.')]
result_columns = [col for col in df.columns if col.startswith('result.')]
print("GT Columns found:", gt_columns)
print("Result Columns found:", result_columns)
# 2. Create proper mapping between GT and result columns
# Handle various naming conventions (spaces, underscores, etc.)
column_mapping = {}
for gt_col in gt_columns:
base_name = gt_col.replace('GT.', '')
# Clean the base name for matching - remove spaces, underscores, etc.
# Try different matching approaches
candidates = [
f'result.{base_name}', # Exact match
f'result.{base_name.replace(" ", "_")}', # With underscores
f'result.{base_name.replace("_", " ")}', # With spaces
f'result.{base_name.replace(" ", "")}', # No spaces
f'result.{base_name.replace("_", "")}' # No underscores
]
# Also try case-insensitive matching
candidates.append(f'result.{base_name.lower()}')
candidates.append(f'result.{base_name.upper()}')
# Try to find matching result column
matched = False
for candidate in candidates:
if candidate in result_columns:
column_mapping[gt_col] = candidate
matched = True
break
# If no exact match found, try partial matching
if not matched:
# Try to match by removing special characters and comparing
base_clean = ''.join(e for e in base_name if e.isalnum() or e in ['_', ' '])
for result_col in result_columns:
result_base = result_col.replace('result.', '')
result_clean = ''.join(e for e in result_base if e.isalnum() or e in ['_', ' '])
if base_clean.lower() == result_clean.lower():
column_mapping[gt_col] = result_col
matched = True
break
print("Column mapping:", column_mapping)
# 3. Faster, vectorized computation using the corrected mapping
data_list = []
for gt_col, result_col in column_mapping.items():
print(f"Processing {gt_col} vs {result_col}")
# Convert to numeric, forcing errors to NaN
s1 = pd.to_numeric(df[gt_col], errors='coerce').astype(float)
s2 = pd.to_numeric(df[result_col], errors='coerce').astype(float)
# Calculate matches (abs difference <= 0.5)
diff = np.abs(s1 - s2)
matches = (diff <= 0.5).sum()
# Determine the denominator (total valid comparisons)
valid_count = diff.notna().sum()
if valid_count > 0:
percentage = (matches / valid_count) * 100
else:
percentage = 0
# Extract clean base name for display
base_name = gt_col.replace('GT.', '')
data_list.append({
'GT': base_name,
'Match %': round(percentage, 1)
})
# 4. Prepare Data
match_df = pd.DataFrame(data_list)
# Clean up labels: Replace underscores with spaces and capitalize
match_df['GT'] = match_df['GT'].str.replace('_', ' ').str.title()
match_df = match_df.sort_values('Match %', ascending=False)
# 5. Create a "Beautiful" Table using Seaborn Heatmap
def create_luxury_table(df, output_file="edss_agreement.png"):
# Set the aesthetic style
sns.set_theme(style="white", font="sans-serif")
# Prepare data for heatmap
plot_data = df.set_index('GT')[['Match %']]
# Initialize the figure
# Height is dynamic based on number of rows
fig, ax = plt.subplots(figsize=(8, len(df) * 0.6))
# Create a custom diverging color map (Deep Red -> Mustard -> Emerald)
# This looks more professional than standard 'RdYlGn'
cmap = sns.diverging_palette(15, 135, s=80, l=55, as_cmap=True)
# Draw the heatmap
sns.heatmap(
plot_data,
annot=True,
fmt=".1f",
cmap=cmap,
center=85, # Centers the color transition
vmin=50, vmax=100, # Range of the gradient
linewidths=2,
linecolor='white',
cbar=False, # Remove color bar for a "table" look
annot_kws={"size": 14, "weight": "bold", "family": "sans-serif"}
)
# Styling the Axes (Turning the heatmap into a table)
ax.set_xlabel("")
ax.set_ylabel("")
ax.xaxis.tick_top() # Move "Match %" label to top
ax.set_xticklabels(['Agreement (%)'], fontsize=14, fontweight='bold', color='#2c3e50')
ax.tick_params(axis='y', labelsize=12, labelcolor='#2c3e50', length=0)
# Add a thin border around the plot
for _, spine in ax.spines.items():
spine.set_visible(True)
spine.set_color('#ecf0f1')
plt.title('EDSS Subcategory Consistency Analysis', fontsize=16, pad=40, fontweight='bold', color='#2c3e50')
# Add a subtle footer
plt.figtext(0.5, 0.0, "Tolerance: ±0.5 points",
wrap=True, horizontalalignment='center', fontsize=10, color='gray', style='italic')
# Save with high resolution
plt.tight_layout()
plt.savefig(output_file, dpi=300, bbox_inches='tight')
print(f"Beautiful table saved as {output_file}")
# Execute
create_luxury_table(match_df)
# Run the function
save_styled_table(match_df)
# 6. Save as SVG
plt.savefig("agreement_table.svg", format='svg', dpi=300, bbox_inches='tight')
print("Successfully saved agreement_table.svg")
# Show plot if running in a GUI environment
plt.show()
##
# %% Time Plot
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy import stats
# Load the TSV file
file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv'
df = pd.read_csv(file_path, sep='\t')
# Extract the inference_time_sec column
inference_times = df['inference_time_sec'].dropna() # Remove NaN values
# Calculate statistics
mean_time = inference_times.mean()
std_time = inference_times.std()
median_time = np.median(inference_times)
# Create the histogram
fig, ax = plt.subplots(figsize=(10, 6))
# Create histogram with bins of 1 second width
min_time = int(inference_times.min())
max_time = int(inference_times.max()) + 1
bins = np.arange(min_time, max_time + 1, 1) # Bins of 1 second width
# Create histogram with counts (not probability density)
n, bins, patches = ax.hist(inference_times, bins=bins, color='lightblue', alpha=0.7, edgecolor='black', linewidth=0.5)
# Generate Gaussian curve for fit
x = np.linspace(inference_times.min(), inference_times.max(), 100)
# Scale Gaussian to match histogram counts
gaussian_counts = stats.norm.pdf(x, mean_time, std_time) * len(inference_times) * (bins[1] - bins[0])
# Plot Gaussian fit
ax.plot(x, gaussian_counts, color='red', linewidth=2, label=f'Gaussian Fit (μ={mean_time:.1f}s, σ={std_time:.1f}s)')
# Add vertical lines for mean and median
ax.axvline(mean_time, color='blue', linestyle='--', linewidth=2, label=f'Mean = {mean_time:.1f}s')
ax.axvline(median_time, color='green', linestyle='--', linewidth=2, label=f'Median = {median_time:.1f}s')
# Add standard deviation as vertical lines
ax.axvline(mean_time + std_time, color='saddlebrown', linestyle=':', linewidth=1, alpha=0.7, label=f'+1σ = {mean_time + std_time:.1f}s')
ax.axvline(mean_time - std_time, color='saddlebrown', linestyle=':', linewidth=1, alpha=0.7, label=f'-1σ = {mean_time - std_time:.1f}s')
ax.set_xlabel('Inference Time (seconds)')
ax.set_ylabel('Frequency')
ax.set_title('Inference Time Distribution with Gaussian Fit')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
##
# %% Dashboard
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import numpy as np
# Load the data
file_path = '/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv'
df = pd.read_csv(file_path, sep='\t')
# Rename columns to remove 'result.' prefix and handle spaces
column_mapping = {}
for col in df.columns:
if col.startswith('result.'):
new_name = col.replace('result.', '')
# Handle spaces in column names (replace with underscores if needed)
new_name = new_name.replace(' ', '_')
column_mapping[col] = new_name
df = df.rename(columns=column_mapping)
# Convert MedDatum to datetime
df['MedDatum'] = pd.to_datetime(df['MedDatum'])
# Check what columns actually exist in the dataset
print("Available columns:")
print(df.columns.tolist())
print("\nFirst few rows:")
print(df.head())
# Hardcode specific patient names
patient_names = ['bc55b1b2']
# Define the functional systems (columns to plot) - adjust based on actual column names
functional_systems = ['EDSS', 'Visual', 'Sensory', 'Motor', 'Brainstem', 'Cerebellar', 'Autonomic', 'Bladder', 'Intellectual']
# Create subplots horizontally (2 columns, adjust rows as needed)
num_plots = len(functional_systems)
num_cols = 2
num_rows = (num_plots + num_cols - 1) // num_cols # Ceiling division
fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 4*num_rows), sharex=False) # Changed sharex=False
if num_plots == 1:
axes = [axes]
elif num_rows == 1:
axes = axes
else:
axes = axes.flatten()
# Plot for the hardcoded patient
for i, system in enumerate(functional_systems):
# Filter data for this specific patient
patient_data = df[df['unique_id'] == patient_names[0]].sort_values('MedDatum')
# Check if patient data exists
if patient_data.empty:
print(f"No data found for patient: {patient_names[0]}")
continue
# Check if the system column exists in the data
if system in patient_data.columns:
# Plot the specific functional system
if not patient_data[system].isna().all():
axes[i].plot(patient_data['MedDatum'], patient_data[system], marker='o', linewidth=2, label=system)
axes[i].set_ylabel('Score')
axes[i].set_title(f'Functional System: {system}')
axes[i].grid(True, alpha=0.3)
axes[i].legend()
else:
axes[i].set_title(f'Functional System: {system} (No data)')
axes[i].set_ylabel('Score')
axes[i].grid(True, alpha=0.3)
else:
# Try to find column with similar name (case insensitive)
found_column = None
for col in df.columns:
if system.lower() in col.lower():
found_column = col
break
if found_column:
print(f"Found similar column: {found_column}")
if not patient_data[found_column].isna().all():
axes[i].plot(patient_data['MedDatum'], patient_data[found_column], marker='o', linewidth=2, label=found_column)
axes[i].set_ylabel('Score')
axes[i].set_title(f'Functional System: {system} (found as: {found_column})')
axes[i].grid(True, alpha=0.3)
axes[i].legend()
else:
axes[i].set_title(f'Functional System: {system} (Column not found)')
axes[i].set_ylabel('Score')
axes[i].grid(True, alpha=0.3)
# Hide empty subplots
for i in range(len(functional_systems), len(axes)):
axes[i].set_visible(False)
# Set x-axis label for the last row only
for i in range(len(functional_systems)):
if i >= len(axes) - num_cols: # Last row
axes[i].set_xlabel('Date')
# Force date formatting on all axes
for ax in axes:
ax.tick_params(axis='x', rotation=45)
ax.xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%Y-%m-%d'))
ax.xaxis.set_major_locator(plt.matplotlib.dates.MonthLocator())
# Automatically format x-axis dates
plt.gcf().autofmt_xdate()
plt.tight_layout()
plt.show()
##
-135
View File
@@ -1,135 +0,0 @@
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import dataframe_image as dfi
# Load data
df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t')
# 1. Identify all GT and result columns
gt_columns = [col for col in df.columns if col.startswith('GT.')]
result_columns = [col for col in df.columns if col.startswith('result.')]
print("GT Columns found:", gt_columns)
print("Result Columns found:", result_columns)
# 2. Create proper mapping between GT and result columns
# Handle various naming conventions (spaces, underscores, etc.)
column_mapping = {}
for gt_col in gt_columns:
base_name = gt_col.replace('GT.', '')
# Clean the base name for matching - remove spaces, underscores, etc.
# Try different matching approaches
candidates = [
f'result.{base_name}', # Exact match
f'result.{base_name.replace(" ", "_")}', # With underscores
f'result.{base_name.replace("_", " ")}', # With spaces
f'result.{base_name.replace(" ", "")}', # No spaces
f'result.{base_name.replace("_", "")}' # No underscores
]
# Also try case-insensitive matching
candidates.append(f'result.{base_name.lower()}')
candidates.append(f'result.{base_name.upper()}')
# Try to find matching result column
matched = False
for candidate in candidates:
if candidate in result_columns:
column_mapping[gt_col] = candidate
matched = True
break
# If no exact match found, try partial matching
if not matched:
# Try to match by removing special characters and comparing
base_clean = ''.join(e for e in base_name if e.isalnum() or e in ['_', ' '])
for result_col in result_columns:
result_base = result_col.replace('result.', '')
result_clean = ''.join(e for e in result_base if e.isalnum() or e in ['_', ' '])
if base_clean.lower() == result_clean.lower():
column_mapping[gt_col] = result_col
matched = True
break
print("Column mapping:", column_mapping)
# 3. Faster, vectorized computation using the corrected mapping
data_list = []
for gt_col, result_col in column_mapping.items():
print(f"Processing {gt_col} vs {result_col}")
# Convert to numeric, forcing errors to NaN
s1 = pd.to_numeric(df[gt_col], errors='coerce').astype(float)
s2 = pd.to_numeric(df[result_col], errors='coerce').astype(float)
# Calculate matches (abs difference <= 0.5)
diff = np.abs(s1 - s2)
matches = (diff <= 0.5).sum()
# Determine the denominator (total valid comparisons)
valid_count = diff.notna().sum()
if valid_count > 0:
percentage = (matches / valid_count) * 100
else:
percentage = 0
# Extract clean base name for display
base_name = gt_col.replace('GT.', '')
data_list.append({
'GT': base_name,
'Match %': round(percentage, 1)
})
# 4. Prepare Data for Plotting
match_df = pd.DataFrame(data_list)
match_df = match_df.sort_values('Match %', ascending=False) # Sort for better visual flow
# 5. Create the Styled Gradient Table
def style_agreement_table(df):
return (df.style
.format({'Match %': '{:.1f}%'}) # Add % sign
.background_gradient(cmap='RdYlGn', subset=['Match %'], vmin=50, vmax=100) # Red to Green gradient
.set_properties(**{
'text-align': 'center',
'font-size': '12pt',
'border-collapse': 'collapse',
'border': '1px solid #D3D3D3'
})
.set_table_styles([
# Style the header
{'selector': 'th', 'props': [
('background-color', '#404040'),
('color', 'white'),
('font-weight', 'bold'),
('text-transform', 'uppercase'),
('padding', '10px')
]},
# Add hover effect
{'selector': 'tr:hover', 'props': [('background-color', '#f5f5f5')]}
])
.set_caption("EDSS Agreement Analysis: Ground Truth vs. Results (Tolerance ±0.5)")
)
# To display in a Jupyter Notebook:
styled_table = style_agreement_table(match_df)
styled_table
dfi.export(styled_table, "styled_table.png")
#styled_table.to_html("agreement_report.html")
# 6. Save as SVG
#plt.savefig("agreement_table.svg", format='svg', dpi=300, bbox_inches='tight')
#print("Successfully saved agreement_table.svg")
# Show plot if running in a GUI environment
plt.show()
-74
View File
@@ -1,74 +0,0 @@
import pandas as pd
import numpy as np
import seaborn as sns
# Sample data (replace with your actual df)
df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t')
# Identify GT and Result columns
gt_columns = [col for col in df.columns if col.startswith('GT.')]
result_columns = [col for col in df.columns if col.startswith('result.')]
# Create mapping
column_mapping = {}
for gt_col in gt_columns:
base_name = gt_col.replace('GT.', '')
result_col = f'result.{base_name}'
if result_col in result_columns:
column_mapping[gt_col] = result_col
# Function to compute match percentage for each GT-Result pair
def compute_match_percentages(df, column_mapping):
percentages = []
for gt_col, result_col in column_mapping.items():
count = 0
total = len(df)
for _, row in df.iterrows():
gt_val = row[gt_col]
result_val = row[result_col]
# Handle NaN values
if pd.isna(gt_val) or pd.isna(result_val):
continue
# Handle non-numeric values
try:
gt_float = float(gt_val)
result_float = float(result_val)
except (ValueError, TypeError):
# Skip rows with non-numeric values
continue
# Check if values are within 0.5 tolerance
if abs(gt_float - result_float) <= 0.5:
count += 1
percentage = (count / total) * 100
percentages.append({
'GT_Column': gt_col,
'Result_Column': result_col,
'Match_Percentage': round(percentage, 1)
})
return pd.DataFrame(percentages)
# Compute match percentages
match_df = compute_match_percentages(df, column_mapping)
# Create a pivot table for gradient display (optional but helpful)
pivot_table = match_df.set_index(['GT_Column', 'Result_Column'])['Match_Percentage'].unstack(fill_value=0)
# Apply gradient background
cm = sns.light_palette("green", as_cmap=True)
styled_table = pivot_table.style.background_gradient(cmap=cm, axis=None)
# Display result
print("Agreement Percentage Table (with gradient):")
styled_table
# Save the styled table to a file
styled_table.to_html("agreement_report.html")
print("Report saved to agreement_report.html")
+31
View File
@@ -0,0 +1,31 @@
# Project Structure
This project was reorganized into:
- `data/`
- `raw/`: original raw data, if retained locally
- `processed/`: cleaned or derived input data
- `ground_truth/`: manually annotated reference data
- `external/`: externally provided data
- `prompts/`
- EDSS instructions and prompt/schema assets
- `scripts/`
- runnable analysis and plotting scripts
- `results/`
- `benchmark_runs/`: full model benchmark runs
- `final_results/`: final selected model outputs
- `figures/`: generated figures
- `tables/`: generated tables
- `logs/`: terminal logs
- `manuscript/`
- final figures and tables for paper/thesis writing
- `archive/`
- old scripts, old results, temporary files, and unclear legacy files
Important:
The reorganization was performed after creating a full timestamped backup.
-3
View File
@@ -216,6 +216,3 @@ if __name__ == "__main__":
# %% name
eXXXXXXXX
##
+481
View File
@@ -0,0 +1,481 @@
1 VISUAL OPTIC FUNCTIONS
VISUAL ACUITY
The visual acuity score is based on the line in the Snellen chart at 20 feet 5 meters
for which the patient makes no more than one error using best available correction
Alternatively best corrected near vision can be assessed but this should be noted and
consistently performed during follow up examinations Switching from near to distance
visual acuity measurements should be avoided in follow up examinations
VISUAL FIELDS
0 normal
1 signs only deficits present only on formal confrontational testing
2 moderate patient aware of deficit but incomplete hemianopsia on examination
3 marked complete homonymous hemianopsia or equivalent
SCOTOMA
0 none
1 small detectable only on formal confrontational testing
2 large spontaneously reported by patient
* DISC PALLOR
0 not present
1 present
NOTE
When determining the EDSS step the Visual FS score must be converted to a lower
score as follows
Visual FS Score 6 5 4 3 2 1
Converted Visual FS Score 4 3 3 2 2 1
FUNCTIONAL SYSTEM SCORE
0 normal
1 disc pallor and or small scotoma and or visual acuity corrected of worse eye less than 20 20 1.0 but better than 20 30 0.67
2 worse eye with maximal visual acuity corrected of 20 30 to 20 59 0.67 0.34
3 worse eye with large scotoma and or moderate decrease in fields and or maximal visual acuity corrected of 20 60 to 20 99 0.33 0.21
4 worse eye with marked decrease of fields and or maximal visual acuity corrected of 20 100 to 20 200 0.2 0.1 grade 3 plus maximal acuity of better eye of 20 60 0.33 or less
5 worse eye with maximal visual acuity corrected less than 20 200 0.1 grade 4 plus maximal acuity of better eye of 20 60 0.33 or less
6 grade 5 plus maximal acuity of better eye of 20 60 0.33 or less * = optional part of the examination
### BRAINSTEM FUNCTIONS
**DYSARTHRIA**
- **0**: None
- **1**: Signs only
- **2**: Mild: Clinically detectable, patient is aware
- **3**: Moderate: Obvious during conversation, impairs comprehension
- **4**: Marked: Incomprehensible speech
- **5**: Inability to speak
**DYSPHAGIA**
- **0**: None
- **1**: Signs only
- **2**: Mild: Difficulty with thin liquids
- **3**: Moderate: Difficulty with liquids and solid food
- **4**: Marked: Sustained difficulty, requires pureed diet
- **5**: Inability to swallow
**OTHER CRANIAL NERVE FUNCTIONS**
- **0**: Normal
- **1**: Signs only
- **2**: Mild disability: Clinically detectable deficit, patient is usually aware
- **3**: Moderate disability
- **4**: Marked disability
**EXTRAOCULAR MOVEMENTS (EOM) IMPAIRMENT**
- **0**: None
- **1**: Signs only: Subtle EOM weakness, no complaints of vision issues
- **2**: Mild: Subtle EOM weakness or obvious incomplete paralysis not noticed by patient
- **3**: Moderate: Obvious incomplete paralysis noticed by patient or complete loss in one direction
- **4**: Marked: Complete loss in more than one direction
**NYSTAGMUS**
- **0**: None
- **1**: Signs only or mild: Gaze-evoked nystagmus below moderate limits (equivalent to Brainstem FS score of 1)
- **2**: Moderate: Sustained nystagmus on horizontal/vertical gaze at 30 degrees, patient may not notice
- **3**: Severe: Nystagmus in primary position or coarse persistent nystagmus interfering with vision; complete internuclear ophthalmoplegia; oscillopsia
**TRIGEMINAL DAMAGE**
- **0**: None
- **1**: Signs only
- **2**: Mild: Clinically detectable numbness, patient is aware
- **3**: Moderate: Impaired sharp/dull discrimination in one to three branches or trigeminal neuralgia (at least one recent attack)
- **4**: Marked: Unable to discriminate between sharp/dull or complete loss of sensation in one or both nerves
**FACIAL WEAKNESS**
- **0**: None
- **1**: Signs only
- **2**: Mild: Clinically detectable weakness, patient is aware
- **3**: Moderate: Incomplete facial palsy (e.g., eye closure requires patching, drooling)
- **4**: Marked: Complete unilateral or bilateral facial palsy with lagophthalmus or difficulty with liquids
**HEARING LOSS**
- **0**: None
- **1**: Signs only: Hears finger rub less on one/both sides, lateralized Weber test but no complaints
- **2**: Mild: As in 1, aware of hearing problem
- **3**: Moderate: Does not hear finger rub on one/both sides, misses several whispered numbers
- **4**: Marked: Misses all or nearly all whispered numbers
**FUNCTIONAL SYSTEM SCORE**
- **0**: Normal
- **1**: Signs only
- **2**: Moderate nystagmus/EOM impairment/other mild disability
- **3**: Severe nystagmus/marked EOM impairment/moderate other cranial nerve disability
- **4**: Marked dysarthria/other marked disability
- **5**: Inability to swallow or speak
### PYRAMIDAL FUNCTIONS
#### REFLEXES
- **0**: Absent
- **1**: Diminished
- **2**: Normal
- **3**: Exaggerated
- **4**: Nonsustained clonus (a few beats of clonus)
- **5**: Sustained clonus
##### Cutaneous Reflexes
- **0**: Normal
- **1**: Weak
- **2**: Absent
###### Palmomental Reflex
- **0**: Absent
- **1**: Present
###### Plantar Response
- **0**: Flexor
- **1**: Neutral or equivocal
- **2**: Extensor
#### LIMB STRENGTH
The weakest muscle in each group defines the score for that muscle group. Optional functional tests (hopping on one foot and walking on heels/toes) are recommended for BMRC grades 35.
##### BMRC Rating Scale
- **0**: No muscle contraction detected
- **1**: Visible contraction without visible joint movement
- **2**: Visible movement only on the plane of gravity
- **3**: Active movement against gravity, but not against resistance
- **4**: Active movement against resistance, but not full strength
- **5**: Normal strength
#### FUNCTIONAL TESTS
##### Pronator Drift (Upper Extremities)
Pronation and downward drift:
- **0**: None
- **1**: Mild
- **2**: Evident
##### Position Test (Lower Extremities)
Ask patient to lift both legs together, with legs fully extended at the knee. Sinking:
- **0**: None
- **1**: Mild
- **2**: Evident
- **3**: Able to lift only one leg at a time (grade from the horizontal position at the hip joints in degrees)
- **4**: Unable to lift one leg at a time
##### Walking on Heels/Toes
- **0**: Normal
- **1**: Impaired
- **2**: Not possible
##### Hopping on One Foot
- **0**: Normal
- **1**: 610 times
- **2**: 15 times
- **3**: Not possible
#### LIMB SPASTICITY (AFTER RAPID FLEXION OF THE EXTREMITY)
- **0**: None
- **1**: Mild: barely increased muscle tone
- **2**: Moderate: moderately increased muscle tone that can be overcome; full range of motion is possible
- **3**: Severe: severely increased muscle tone that is extremely difficult to overcome; full range of motion is not possible
- **4**: Contracted
#### GAIT SPASTICITY
- **0**: None
- **1**: Barely perceptible
- **2**: Evident: minor interference with function
- **3**: Permanent shuffling: major interference with function
#### OVERALL MOTOR PERFORMANCE
- **0**: Normal
- **1**: Abnormal weakness (as compared to peers) in performing more demanding tasks, e.g., walking longer distances; no reduction in limb strength on formal testing
- **2**: Reduction in strength of individual muscle groups at confrontational testing
#### FUNCTIONAL SYSTEM SCORE
- **0**: Normal
- **1**: Abnormal signs without disability
- **2**: Minimal disability: patient complains of motor-fatigability or reduced performance in strenuous motor tasks (motor performance grade 1) and/or BMRC grade 4 in one or two muscle groups
- **3**: Mild to moderate paraparesis or hemiparesis: usually BMRC grade 4 in more than two muscle groups; and/or BMRC grade 3 in one or two muscle groups (movements against gravity
are possible); and/or severe monoparesis: BMRC grade 2 or less in one muscle group
- **4**: Marked paraparesis or hemiparesis: usually BMRC grade 2 in two limbs or monoplegia with BMRC grade 0 or 1 in one limb; and/or moderate tetraparesis: BMRC grade 3 in three or more limbs
- **5**: Paraplegia: BMRC grade 0 or 1 in all muscle groups of the lower limbs; and/or marked tetraparesis: BMRC grade 2 or less in three or more limbs; and/or hemiplegia
- **6**: Tetraplegia: BMRC grade 0 or 1 in all muscle groups of the upper and lower limbs
### CEREBELLAR FUNCTIONS
#### HEAD TREMOR
- **0**: none
- **1**: mild
- **2**: moderate
- **3**: severe
#### TRUNCAL ATAXIA
- **0**: none
- **1**: signs only
- **2**: mild (swaying with eyes closed)
- **3**: moderate (swaying with eyes open)
- **4**: severe (unable to sit without assistance)
#### LIMB ATAXIA (TREMOR / DYSMETRIA AND RAPID ALTERNATING MOVEMENTS)
- **0**: none
- **1**: signs only
- **2**: mild (tremor or clumsy movements easily seen, minor interference with function)
- **3**: moderate (tremor or clumsy movements interfere with function in all spheres)
- **4**: severe (most functions are very difficult)
#### TANDEM (STRAIGHT LINE) WALKING
- **0**: normal
- **1**: impaired
- **2**: not possible
#### GAIT ATAXIA
- **0**: none
- **1**: signs only
- **2**: mild (problems with balance realized by patient and/or significant other)
- **3**: moderate (abnormal balance with ordinary walking)
- **4**: severe (unable to walk more than a few steps unassisted or requires a walking aid or assistance due to ataxia)
#### ROMBERG TEST
- **0**: normal
- **1**: mild (mild instability with eyes closed)
- **2**: moderate (not stable with eyes closed)
- **3**: severe (not stable with eyes open)
#### OTHER CEREBELLAR TESTS
- **0**: normal
- **1**: mild abnormality
- **2**: moderate abnormality
- **3**: severe abnormality
**NOTE:**
- The presence of severe gait and/or truncal ataxia alone (without severe ataxia in three or four limbs) results in a Cerebellar FS score of 3.
- If weakness or sensory deficits interfere with the testing of ataxia, score the patients actual performance. Indicate the possible role of weakness by marking an "X" after the
affected subsystems and Cerebellar FS score.
#### FUNCTIONAL SYSTEM SCORE
- **0**: normal
- **1**: abnormal signs without disability
- **2**: mild ataxia and/or moderate station ataxia (Romberg) and/or tandem walking not possible
- **3**: moderate limb ataxia and/or moderate or severe gait/truncal ataxia
- **4**: severe gait/truncal ataxia and severe ataxia in three or four limbs
- **5**: unable to perform coordinated movements due to ataxia
- **X**: pyramidal weakness (BMRC grade 3 or worse in limb strength) or sensory deficits interfere with cerebellar testing
### SENSORY FUNCTIONS
#### SUPERFICIAL SENSATION (LIGHT TOUCH AND PAIN)
- **0**: normal
- **1**: signs only (slightly diminished sensation on formal testing, patient not aware)
- **2**: mild (patient aware of impaired light touch or pain but can discriminate sharp/dull)
- **3**: moderate (impaired discrimination of sharp/dull)
- **4**: marked (unable to discriminate between sharp/dull and/or unable to feel light touch)
- **5**: complete loss (anesthesia)
#### VIBRATION SENSE (AT THE MOST DISTAL JOINT)
- **0**: normal
- **1**: mild (graded tuning fork 57 of 8; detects more than 10 seconds but less than examiner)
- **2**: moderate (graded tuning fork 14 of 8; detects between 2 and 10 sec.)
- **3**: marked (complete loss of vibration sense)
#### POSITION SENSE
- **0**: normal
- **1**: mild (12 incorrect responses, only distal joints affected)
- **2**: moderate (misses many movements of fingers or toes; proximal joints affected)
- **3**: marked (no perception of movement, astasia)
* **LHERMITTES SIGN** (does not contribute to the Sensory FS score)
- **0**: negative
- **1**: positive
* **PARAESTHESIAE (TINGLING)** (does not contribute to the Sensory FS score)
- **0**: none
- **1**: present
#### FUNCTIONAL SYSTEM SCORE
- **0**: normal
- **1**: impaired superficial sensation in one or two limbs
- **2**: mild impairment in more than two limbs, no major proprioceptive deficits
- **3**: moderate impairment in more than two limbs with minor proprioceptive deficits
- **4**: severe impairment in more than two limbs with significant proprioceptive deficits
- **5**: loss of sensation in one or two limbs, significant proprioceptive deficits in most of the body below the head
- **6**: essentially no sensation below the head
### BOWEL AND BLADDER FUNCTIONS
#### URINARY HESITANCY AND RETENTION
- **0**: none
- **1**: mild (no major impact on lifestyle)
- **2**: moderate (urinary retention; frequent urinary tract infections)
- **3**: severe (requires catheterization)
- **4**: loss of function (overflow incontinence)
#### URINARY URGENCY AND INCONTINENCE
- **0**: none
- **1**: mild (no major impact on lifestyle)
- **2**: moderate (rare incontinence occurring no more than once a week; must wear pads)
- **3**: severe (frequent incontinence occurring from several times a week to more than once a day; must wear urinal or pads)
- **4**: loss of function (loss of bladder control)
#### BLADDER CATHETERIZATION
- **0**: none
- **1**: intermittent self-catheterization
- **2**: constant catheterization
#### BOWEL DYSFUNCTION
- **0**: none
- **1**: mild (no incontinence, no major impact on lifestyle, mild constipation)
- **2**: moderate (must wear pads or alter lifestyle to be near lavatory)
- **3**: severe (in need of enemas or manual measures to evacuate bowels)
- **4**: complete loss of function
#### SEXUAL DYSFUNCTION
**Male**
- **0**: none
- **1**: mild (difficulty maintaining erection during intercourse, but achieves erection and still has intercourse)
- **2**: moderate (difficulty achieving erection, decreased libido, still has intercourse and reaches orgasm)
- **3**: severe (marked decrease in libido, inability to achieve full erection, intercourse with difficulty, hypoorgasmia)
- **4**: loss of function
**Female**
- **0**: none
- **1**: mild (mild lack of lubrication, still sexually active and reaches orgasm)
- **2**: moderate (dyspareunia, hypoorgasmia, decrease in sexual activity)
- **3**: severe (marked decrease in sexual activity, anorgasmia)
- **4**: loss of function
**NOTE**
When determining the EDSS step, the Bowel and Bladder FS score must be converted to a lower score as follows:
- Bowel and Bladder FS Score: 6 → Converted Bowel and Bladder FS Score: 5
- Bowel and Bladder FS Score: 5 → Converted Bowel and Bladder FS Score: 4
- Bowel and Bladder FS Score: 4 → Converted Bowel and Bladder FS Score: 3
- Bowel and Bladder FS Score: 3 → Converted Bowel and Bladder FS Score: 3
- Bowel and Bladder FS Score: 2 → Converted Bowel and Bladder FS Score: 2
- Bowel and Bladder FS Score: 1 → Converted Bowel and Bladder FS Score: 1
Sexual dysfunction can be documented but generally does not impact the FS score due to assessment difficulties by examining physicians.
### FUNCTIONAL SYSTEM SCORE
- **0**: normal
- **1**: mild urinary hesitancy, urgency, and/or constipation
- **2**: moderate urinary hesitancy/retention and/or moderate urinary urgency/incontinence and/or moderate bowel dysfunction
- **3**: frequent urinary incontinence or intermittent self-catheterization; needs enemas or manual measures to evacuate bowels
- **4**: in need of almost constant catheterization
- **5**: loss of bladder or bowel function (external or indwelling catheter)
- **6**: loss of bowel and bladder function
### CEREBRAL FUNCTIONS
#### DEPRESSION AND EUPHORIA
- **0**: none
- **1**: present (Patient complains of depression or is considered depressed or euphoric by the investigator or significant other.)
**Note**: Depression and Euphoria are documented on the scoring sheet but are not taken into consideration for FS and EDSS calculation.
#### DECREASE IN MENTATION
- **0**: none
- **1**: signs only (not apparent to patient and/or significant other)
- **2**: mild (Patient and/or significant other report mild changes in mentation. Examples include: impaired ability to follow a rapid course of association or survey complex matters;
impaired judgment in certain demanding situations; capable of handling routine daily activities, but unable to tolerate additional stressors; intermittently symptomatic even with
normal levels of stress; reduced performance; tendency toward negligence due to obliviousness or fatigue.)
- **3**: moderate (Definite abnormalities on brief mental status testing, but still oriented to person, place, and time)
- **4**: marked (Not oriented in one or two spheres (person, place, or time); marked effect on lifestyle)
- **5**: dementia, confusion, and/or complete disorientation
#### FATIGUE
- **0**: none
- **1**: mild (Does not usually interfere with daily activities)
- **2**: moderate (Interferes but does not limit daily activities for more than 50%)
- **3**: severe (Significant limitation in daily activities (> 50% reduction))
**Note**: Because fatigue is difficult to evaluate objectively, in some studies it does not contribute to the Cerebral FS score or EDSS step. Please adhere to the studys specific
instructions.
### FUNCTIONAL SYSTEM SCORE
- **0**: normal
- **1**: signs only in decrease in mentation; mild fatigue
- **2**: mild decrease in mentation; moderate or severe fatigue
- **3**: moderate decrease in mentation
- **4**: marked decrease in mentation
- **5**: dementia
### AMBULATION
**Unrestricted Ambulation**
- The patient can walk a normal distance without assistance, comparable to healthy individuals of similar age and physical condition.
- EDSS step can range from 0 to 5.0, depending on the Functional System (FS) scores.
**Fully Ambulatory**
- At least 500 meters of ambulation without assistance, but not unrestricted.
- EDSS step can range from 2.0 to 5.0, depending on FS scores.
- The Pyramidal and/or Cerebellar FS must be ≥ 2 to reflect this restriction in ambulation.
**Ambulation < 500 Meters**
- If the walking distance is less than 500 meters, the EDSS step must be ≥ 4.5, depending on the walking ranges provided by the ambulation score and combination of FS scores.
- EDSS steps 5.5 to 8.0 are exclusively defined by the ability to ambulate and type of assistance required, or the ability to use a wheelchair.
**Assistance Needed**
- Definitions for EDSS steps 6.0 or 6.5 include both the type of assistance required when walking and the walking range.
- Assistance by another person is equivalent to bilateral assistance.
**Note:**
- The ambulation score represents both the walking range and the type of assistance required.
- This score replaces several checkboxes used previously on the scoring sheet but does not introduce new definitions.
- Use of a wheelchair can now be scored on the scoring sheet.
- Indicate the reported distance and time for the patient in the appropriate field on the scoring sheet, followed by the type of assistance and walking distance measured during assessment.
### DISTANCE AND TIME REPORTED BY PATIENT
**Maximal Unassisted Walking Distance**
- Maximal unassisted walking distance reported by the patient (in meters) without rest or assistance.
- Time required to walk the maximum distance according to the patient (in minutes).
**Assistance**
0. Without help or assistance (allowing use of an ankle-foot orthotic device, but no other assistive devices).
1. Unilateral assistance: one stick/crutch/brace.
2. Bilateral assistance: two sticks/crutches/braces or assistance by another person.
3. Wheelchair.
**Distance**
- Measure the distance the patient can walk in meters.
- **Unassisted:** Observe walking for a minimum of 500 meters and measure time needed, if possible.
- **Assisted:** Observe walking with assistive devices or help from another person for a minimum of 130 meters, if possible.
---
### AMBULATION SCORE
0. Unrestricted
1. Fully ambulatory
2. ≥ 300 meters but < 500 meters, without help or assistance (EDSS 4.5 or 5.0)
3. ≥ 200 meters but < 300 meters, without help or assistance (EDSS 5.0)
4. ≥ 100 meters but < 200 meters, without help or assistance (EDSS 5.5)
5. Walking range < 100 meters without assistance (EDSS 6.0)
6. Unilateral assistance, ≥ 50 meters (EDSS 6.0)
7. Bilateral assistance, ≥ 120 meters (EDSS 6.0)
8. Unilateral assistance, < 50 meters (EDSS 6.5)
9. Bilateral assistance, ≥ 5 meters but < 120 meters (EDSS 6.5)
10. Uses wheelchair without help; unable to walk 5 meters even with aid, essentially restricted to wheelchair; wheels self and transfers alone; up and about in wheelchair for some 12 hours a day (EDSS 7.0)
11. Uses wheelchair with help; unable to take more than a few steps; restricted to wheelchair; may need some help in transferring and wheeling self (EDSS 7.5)
12. Essentially restricted to bed or chair or perambulated in wheelchair, but out of bed most of the day; retains many self-care functions; generally has effective use of arms (EDSS 8.0)
Expanded Disability Status Scale (EDSS)
0 - Normal neurological exam (all Functional Systems [FS] grade 0)
1.0 - No disability, minimal signs in one FS (one FS grade 1)
1.5 - No disability, minimal signs in more than one FS (more than one FS grade 1)
2.0 - Minimal disability in one FS (one FS grade 2, others 0 or 1)
2.5 - Minimal disability in two FS (two FS grades 2, others 0 or 1)
3.0 - Moderate disability in one FS (one FS grade 3, others 0 or 1) though fully ambulatory;
or mild disability in three or four FS (three/four FS grades 2, others 0 or 1) though fully ambulatory
3.5 - Fully ambulatory but with moderate disability in one FS (one FS grade 3) and mild disability in one or two FS (one/two FS grade 2) and others 0 or 1;
or fully ambulatory with two FS grades 3 (others 0 or 1);
or fully ambulatory with five FS grades 2 (others 0 or 1)
4.0 - Unable to walk > 25 feet without aid
4.5 - Unable to walk > 100 feet without aid
5.0 - Relies on a walking aid; unable to walk > 300 feet without resting
5.5 - Relies on a walking aid; unable to walk > 200 feet without resting
6.0 - Unable to walk more than 50 feet with or without aid; cannot stand unaided for five minutes
6.5 - Unable to walk more than 10 feet with or without aid; cannot stand unaided for two minutes
7.0 - Unable to walk 5 meters even with aid, essentially restricted to wheelchair; wheels self and transfers alone; up and about in wheelchair some 12 hours a day
7.5 - Unable to take more than a few steps; restricted to wheelchair; may need some help in transferring and in wheeling self
8.0 - Essentially restricted to bed or chair or perambulated in wheelchair, but out of bed most of the day; retains many self-care functions; generally has effective use of arms
8.5 - Essentially restricted to bed much of the day; has some effective use of arm(s); retains some self-care functions
9.0 - Helpless bed patient; can communicate and eat
9.5 - Totally helpless bed patient; unable to communicate effectively or eat/swallow
10 - Death due to MS
+11
View File
@@ -0,0 +1,11 @@
EDSS-kv ::= "\"EDSS\"" space ":" space number
Reason ::= "\"" char{0,400} "\"" space
Reason-kv ::= "\"Reason\"" space ":" space Reason
boolean ::= ("true" | "false") space
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15}
nicht-klassifizierbar-kv ::= "\"nicht_klassifizierbar\"" space ":" space boolean
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "{" space Reason-kv "," space nicht-klassifizierbar-kv ( "," space ( EDSS-kv ) )? "}" space
space ::= | " " | "\n"{1,2} [ \t]{0,20}
+25
View File
@@ -0,0 +1,25 @@
Expanded Disability Status Scale (EDSS)
0 - Normal neurological exam (all Functional Systems [FS] grade 0)
1.0 - No disability, minimal signs in one FS (one FS grade 1)
1.5 - No disability, minimal signs in more than one FS (more than one FS grade 1)
2.0 - Minimal disability in one FS (one FS grade 2, others 0 or 1)
2.5 - Minimal disability in two FS (two FS grades 2, others 0 or 1)
3.0 - Moderate disability in one FS (one FS grade 3, others 0 or 1) though fully ambulatory;
or mild disability in three or four FS (three/four FS grades 2, others 0 or 1) though fully ambulatory
3.5 - Fully ambulatory but with moderate disability in one FS (one FS grade 3) and mild disability in one or two FS (one/two FS grade 2) and others 0 or 1;
or fully ambulatory with two FS grades 3 (others 0 or 1);
or fully ambulatory with five FS grades 2 (others 0 or 1)
4.0 - Unable to walk > 25 feet without aid
4.5 - Unable to walk > 100 feet without aid
5.0 - Relies on a walking aid; unable to walk > 300 feet without resting
5.5 - Relies on a walking aid; unable to walk > 200 feet without resting
6.0 - Unable to walk more than 50 feet with or without aid; cannot stand unaided for five minutes
6.5 - Unable to walk more than 10 feet with or without aid; cannot stand unaided for two minutes
7.0 - Unable to walk 5 meters even with aid, essentially restricted to wheelchair; wheels self and transfers alone; up and about in wheelchair some 12 hours a day
7.5 - Unable to take more than a few steps; restricted to wheelchair; may need some help in transferring and in wheeling self
8.0 - Essentially restricted to bed or chair or perambulated in wheelchair, but out of bed most of the day; retains many self-care functions; generally has effective use of arms
8.5 - Essentially restricted to bed much of the day; has some effective use of arm(s); retains some self-care functions
9.0 - Helpless bed patient; can communicate and eat
9.5 - Totally helpless bed patient; unable to communicate effectively or eat/swallow
10 - Death due to MS
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
File diff suppressed because it is too large Load Diff
+117
View File
@@ -263,3 +263,120 @@ plt.legend(frameon=False, loc='upper center', bbox_to_anchor=(0.5, -0.05))
plt.tight_layout()
plt.show()
##
# %% name
import matplotlib.pyplot as plt
# Data
data = {
'Visit': [9, 8, 7, 6, 5, 4, 3, 2, 1],
'patient_count': [2, 3, 3, 6, 13, 17, 28, 24, 32]
}
# Create figure and axis
fig, ax = plt.subplots(figsize=(10, 6))
# Plot the bar chart
bars = ax.bar(data['Visit'], data['patient_count'], color='darkblue', label='Patients by Visit Count')
# Add labels and title
ax.set_xlabel('Visit Number (from last to first)', fontsize=12)
ax.set_ylabel('Number of Patients', fontsize=12)
ax.set_title('Patient Visits by Visit Number', fontsize=14)
# Invert x-axis to show Visit 9 on the left (descending order) if desired, but keep natural order (19 left to right)
# For descending order (9→1 from left to right), we'd need to reverse:
# Visit = data['Visit'][::-1], patient_count = data['patient_count'][::-1]
# But standard practice is ascending (1 to 9), so we'll sort accordingly:
# Let's sort by Visit to ensure left-to-right: 1,2,...,9
# Actually, your current Visit list is [9,8,...,1], which is descending.
# Let's sort by Visit for intuitive left-to-right increasing order:
sorted_indices = sorted(range(len(data['Visit'])), key=lambda i: data['Visit'][i])
visit_sorted = [data['Visit'][i] for i in sorted_indices]
count_sorted = [data['patient_count'][i] for i in sorted_indices]
# Re-plot with sorted x-axis:
ax.clear()
bars = ax.bar(visit_sorted, count_sorted, color='darkblue', label='Patients by Visit Count')
# Re-apply labels, etc.
ax.set_xlabel('Number of Visits', fontsize=12)
ax.set_ylabel('Number of Unique Patients', fontsize=12)
#ax.set_title('Number of Patients by Visit Number', fontsize=14)
# Add legend
ax.legend()
# Improve layout and grid
ax.grid(axis='y', linestyle='--', alpha=0.7)
plt.xticks(visit_sorted) # Ensure all integer visit numbers are shown
# Show the plot
plt.tight_layout()
plt.show()
##
# %% Patientjourney Bubble chart
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
mpl.rcParams["font.family"] = "DejaVu Sans" # or "Arial", "Calibri", "Times New Roman", ...
mpl.rcParams["font.size"] = 12 # default size for text
mpl.rcParams["axes.titlesize"] = 14
mpl.rcParams["axes.titleweight"] = "bold"
# Data (your counts)
visits = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
patient_count = np.array([32, 24, 28, 17, 13, 6, 3, 3, 2])
# "Remaining" = patients with >= that many visits (cumulative from the right)
remaining = np.array([patient_count[i:].sum() for i in range(len(patient_count))])
# --- Plot ---
fig, ax = plt.subplots(figsize=(12, 3))
y = 0.0 # all bubbles on one horizontal line
# Horizontal line
ax.hlines(y, visits.min() - 0.4, visits.max() + 0.4, color="#1f77b4", linewidth=3)
# Bubble sizes (scale as needed)
# (Matplotlib scatter uses area in points^2)
sizes = patient_count * 35 # tweak this multiplier if you want bigger/smaller bubbles
ax.scatter(visits, np.full_like(visits, y), s=sizes, color="#1f77b4", zorder=3)
# Title
#ax.set_title("Patient Journey by Visit Count", fontsize=14, pad=18)
# Top labels: "1 visits", "2 visits", ...
for x in visits:
label = f"{x} visit" if x == 1 else f"{x} visits"
ax.text(x, y + 0.18, label, ha="center", va="bottom", fontsize=10)
# Bottom labels: "X patients" and "Y remaining"
for x, pc, rem in zip(visits, patient_count, remaining):
ax.text(x, y - 0.20, f"{pc} patients", ha="center", va="top", fontsize=9)
ax.text(x, y - 0.32, f"{rem} remaining", ha="center", va="top", fontsize=9)
# Cosmetics: remove axes, keep spacing nice
ax.set_xlim(visits.min() - 0.6, visits.max() + 0.6)
ax.set_ylim(-0.5, 0.35)
ax.set_xticks([])
ax.set_yticks([])
for spine in ax.spines.values():
spine.set_visible(False)
plt.tight_layout()
plt.show()
plt.savefig("patient_journey.svg", format="svg", bbox_inches="tight")
##
+12966
View File
File diff suppressed because it is too large Load Diff