Adjsuting and cleaning
This commit is contained in:
@@ -1828,6 +1828,97 @@ plt.tight_layout()
|
|||||||
plt.show()
|
plt.show()
|
||||||
##
|
##
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# %% name
|
||||||
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# --- Configuration & Theme ---
|
||||||
|
plt.rcParams['font.family'] = 'Arial'
|
||||||
|
figure_save_path = 'project/visuals/functional_systems_magnitude_focus.svg'
|
||||||
|
|
||||||
|
# --- 1. Process Error Data with Magnitude Breakdown ---
|
||||||
|
system_names = [name.split('.')[1] for name, _ in functional_systems_to_plot]
|
||||||
|
plot_list = []
|
||||||
|
|
||||||
|
for gt_col, res_col in functional_systems_to_plot:
|
||||||
|
sys_name = gt_col.split('.')[1]
|
||||||
|
|
||||||
|
# Robust parsing
|
||||||
|
gt = df[gt_col].apply(safe_parse)
|
||||||
|
res = df[res_col].apply(safe_parse)
|
||||||
|
error = res - gt
|
||||||
|
|
||||||
|
# Granular Counts
|
||||||
|
matches = (error == 0).sum()
|
||||||
|
u_1 = (error == -1).sum()
|
||||||
|
u_2plus = (error <= -2).sum()
|
||||||
|
o_1 = (error == 1).sum()
|
||||||
|
o_2plus = (error >= 2).sum()
|
||||||
|
|
||||||
|
total = error.dropna().count()
|
||||||
|
divisor = max(total, 1)
|
||||||
|
|
||||||
|
plot_list.append({
|
||||||
|
'System': sys_name.replace('_', ' ').title(),
|
||||||
|
'Matches': matches, 'MatchPct': (matches / divisor) * 100,
|
||||||
|
'U1': u_1, 'U2': u_2plus, 'UnderTotal': u_1 + u_2plus,
|
||||||
|
'UnderPct': ((u_1 + u_2plus) / divisor) * 100,
|
||||||
|
'O1': o_1, 'O2': o_2plus, 'OverTotal': o_1 + o_2plus,
|
||||||
|
'OverPct': ((o_1 + o_2plus) / divisor) * 100
|
||||||
|
})
|
||||||
|
|
||||||
|
stats_df = pd.DataFrame(plot_list)
|
||||||
|
|
||||||
|
# --- 2. Plotting ---
|
||||||
|
fig, ax = plt.subplots(figsize=(13, 8))
|
||||||
|
|
||||||
|
# Define Magnitude Colors
|
||||||
|
c_under_dark, c_under_light = '#C0392B', '#E74C3C' # Dark Red (-2+), Soft Red (-1)
|
||||||
|
c_over_dark, c_over_light = '#2980B9', '#3498DB' # Dark Blue (+2+), Soft Blue (+1)
|
||||||
|
bar_height = 0.6
|
||||||
|
y_pos = np.arange(len(stats_df))
|
||||||
|
|
||||||
|
# Plot Under-scored (Stacked: -2+ then -1)
|
||||||
|
ax.barh(y_pos, -stats_df['U2'], bar_height, color=c_under_dark, label='Under -2+', edgecolor='white')
|
||||||
|
ax.barh(y_pos, -stats_df['U1'], bar_height, left=-stats_df['U2'], color=c_under_light, label='Under -1', edgecolor='white')
|
||||||
|
|
||||||
|
# Plot Over-scored (Stacked: +1 then +2+)
|
||||||
|
ax.barh(y_pos, stats_df['O1'], bar_height, color=c_over_light, label='Over +1', edgecolor='white')
|
||||||
|
ax.barh(y_pos, stats_df['O2'], bar_height, left=stats_df['O1'], color=c_over_dark, label='Over +2+', edgecolor='white')
|
||||||
|
|
||||||
|
# --- 3. Aesthetics & Table Labels ---
|
||||||
|
for i, row in stats_df.iterrows():
|
||||||
|
label_text = (
|
||||||
|
f"$\\mathbf{{{row['System']}}}$\n"
|
||||||
|
f"Match: {int(row['Matches'])} ({row['MatchPct']:.1f}%)\n"
|
||||||
|
f"Under: {int(row['UnderTotal'])} ({row['UnderPct']:.1f}%) | Over: {int(row['OverTotal'])} ({row['OverPct']:.1f}%)"
|
||||||
|
)
|
||||||
|
# Position table text to the left
|
||||||
|
ax.text(ax.get_xlim()[0] - 0.5, i, label_text, va='center', ha='right', fontsize=9, color='#333333', linespacing=1.4)
|
||||||
|
|
||||||
|
# Formatting
|
||||||
|
ax.axvline(0, color='black', linewidth=1.2)
|
||||||
|
ax.set_yticks([])
|
||||||
|
ax.set_xlabel('Number of Patients with Error', fontsize=11, fontweight='bold')
|
||||||
|
#ax.set_title('Directional Error Magnitude (Under vs. Over Scoring)', fontsize=14, pad=35)
|
||||||
|
|
||||||
|
# Absolute X-axis labels
|
||||||
|
ax.set_xticklabels([int(abs(tick)) for tick in ax.get_xticks()])
|
||||||
|
|
||||||
|
# Remove spines and add grid
|
||||||
|
for spine in ['top', 'right', 'left']: ax.spines[spine].set_visible(False)
|
||||||
|
ax.xaxis.grid(True, linestyle='--', alpha=0.3)
|
||||||
|
|
||||||
|
# Legend with magnitude info
|
||||||
|
ax.legend(loc='upper right', frameon=False, bbox_to_anchor=(1, 1.1), ncol=2)
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.show()
|
||||||
|
##
|
||||||
# %% test
|
# %% test
|
||||||
# Diagnose: what are the actual differences?
|
# Diagnose: what are the actual differences?
|
||||||
print("\n🔍 Raw differences (first 5 rows per system):")
|
print("\n🔍 Raw differences (first 5 rows per system):")
|
||||||
|
|||||||
135
Data/style2.py
135
Data/style2.py
@@ -1,135 +0,0 @@
|
|||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import seaborn as sns
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import dataframe_image as dfi
|
|
||||||
# Load data
|
|
||||||
df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t')
|
|
||||||
|
|
||||||
# 1. Identify all GT and result columns
|
|
||||||
gt_columns = [col for col in df.columns if col.startswith('GT.')]
|
|
||||||
result_columns = [col for col in df.columns if col.startswith('result.')]
|
|
||||||
|
|
||||||
print("GT Columns found:", gt_columns)
|
|
||||||
print("Result Columns found:", result_columns)
|
|
||||||
|
|
||||||
# 2. Create proper mapping between GT and result columns
|
|
||||||
# Handle various naming conventions (spaces, underscores, etc.)
|
|
||||||
column_mapping = {}
|
|
||||||
|
|
||||||
for gt_col in gt_columns:
|
|
||||||
base_name = gt_col.replace('GT.', '')
|
|
||||||
|
|
||||||
# Clean the base name for matching - remove spaces, underscores, etc.
|
|
||||||
# Try different matching approaches
|
|
||||||
candidates = [
|
|
||||||
f'result.{base_name}', # Exact match
|
|
||||||
f'result.{base_name.replace(" ", "_")}', # With underscores
|
|
||||||
f'result.{base_name.replace("_", " ")}', # With spaces
|
|
||||||
f'result.{base_name.replace(" ", "")}', # No spaces
|
|
||||||
f'result.{base_name.replace("_", "")}' # No underscores
|
|
||||||
]
|
|
||||||
|
|
||||||
# Also try case-insensitive matching
|
|
||||||
candidates.append(f'result.{base_name.lower()}')
|
|
||||||
candidates.append(f'result.{base_name.upper()}')
|
|
||||||
|
|
||||||
# Try to find matching result column
|
|
||||||
matched = False
|
|
||||||
for candidate in candidates:
|
|
||||||
if candidate in result_columns:
|
|
||||||
column_mapping[gt_col] = candidate
|
|
||||||
matched = True
|
|
||||||
break
|
|
||||||
|
|
||||||
# If no exact match found, try partial matching
|
|
||||||
if not matched:
|
|
||||||
# Try to match by removing special characters and comparing
|
|
||||||
base_clean = ''.join(e for e in base_name if e.isalnum() or e in ['_', ' '])
|
|
||||||
for result_col in result_columns:
|
|
||||||
result_base = result_col.replace('result.', '')
|
|
||||||
result_clean = ''.join(e for e in result_base if e.isalnum() or e in ['_', ' '])
|
|
||||||
if base_clean.lower() == result_clean.lower():
|
|
||||||
column_mapping[gt_col] = result_col
|
|
||||||
matched = True
|
|
||||||
break
|
|
||||||
|
|
||||||
print("Column mapping:", column_mapping)
|
|
||||||
|
|
||||||
# 3. Faster, vectorized computation using the corrected mapping
|
|
||||||
data_list = []
|
|
||||||
|
|
||||||
for gt_col, result_col in column_mapping.items():
|
|
||||||
print(f"Processing {gt_col} vs {result_col}")
|
|
||||||
|
|
||||||
# Convert to numeric, forcing errors to NaN
|
|
||||||
s1 = pd.to_numeric(df[gt_col], errors='coerce').astype(float)
|
|
||||||
s2 = pd.to_numeric(df[result_col], errors='coerce').astype(float)
|
|
||||||
|
|
||||||
# Calculate matches (abs difference <= 0.5)
|
|
||||||
diff = np.abs(s1 - s2)
|
|
||||||
matches = (diff <= 0.5).sum()
|
|
||||||
|
|
||||||
# Determine the denominator (total valid comparisons)
|
|
||||||
valid_count = diff.notna().sum()
|
|
||||||
|
|
||||||
if valid_count > 0:
|
|
||||||
percentage = (matches / valid_count) * 100
|
|
||||||
else:
|
|
||||||
percentage = 0
|
|
||||||
|
|
||||||
# Extract clean base name for display
|
|
||||||
base_name = gt_col.replace('GT.', '')
|
|
||||||
|
|
||||||
data_list.append({
|
|
||||||
'GT': base_name,
|
|
||||||
'Match %': round(percentage, 1)
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# 4. Prepare Data for Plotting
|
|
||||||
match_df = pd.DataFrame(data_list)
|
|
||||||
match_df = match_df.sort_values('Match %', ascending=False) # Sort for better visual flow
|
|
||||||
|
|
||||||
# 5. Create the Styled Gradient Table
|
|
||||||
def style_agreement_table(df):
|
|
||||||
return (df.style
|
|
||||||
.format({'Match %': '{:.1f}%'}) # Add % sign
|
|
||||||
.background_gradient(cmap='RdYlGn', subset=['Match %'], vmin=50, vmax=100) # Red to Green gradient
|
|
||||||
.set_properties(**{
|
|
||||||
'text-align': 'center',
|
|
||||||
'font-size': '12pt',
|
|
||||||
'border-collapse': 'collapse',
|
|
||||||
'border': '1px solid #D3D3D3'
|
|
||||||
})
|
|
||||||
.set_table_styles([
|
|
||||||
# Style the header
|
|
||||||
{'selector': 'th', 'props': [
|
|
||||||
('background-color', '#404040'),
|
|
||||||
('color', 'white'),
|
|
||||||
('font-weight', 'bold'),
|
|
||||||
('text-transform', 'uppercase'),
|
|
||||||
('padding', '10px')
|
|
||||||
]},
|
|
||||||
# Add hover effect
|
|
||||||
{'selector': 'tr:hover', 'props': [('background-color', '#f5f5f5')]}
|
|
||||||
])
|
|
||||||
.set_caption("EDSS Agreement Analysis: Ground Truth vs. Results (Tolerance ±0.5)")
|
|
||||||
)
|
|
||||||
|
|
||||||
# To display in a Jupyter Notebook:
|
|
||||||
styled_table = style_agreement_table(match_df)
|
|
||||||
styled_table
|
|
||||||
|
|
||||||
dfi.export(styled_table, "styled_table.png")
|
|
||||||
#styled_table.to_html("agreement_report.html")
|
|
||||||
# 6. Save as SVG
|
|
||||||
|
|
||||||
#plt.savefig("agreement_table.svg", format='svg', dpi=300, bbox_inches='tight')
|
|
||||||
#print("Successfully saved agreement_table.svg")
|
|
||||||
|
|
||||||
# Show plot if running in a GUI environment
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,74 +0,0 @@
|
|||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
import seaborn as sns
|
|
||||||
|
|
||||||
# Sample data (replace with your actual df)
|
|
||||||
df = pd.read_csv("/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Join_edssandsub.tsv", sep='\t')
|
|
||||||
|
|
||||||
# Identify GT and Result columns
|
|
||||||
gt_columns = [col for col in df.columns if col.startswith('GT.')]
|
|
||||||
result_columns = [col for col in df.columns if col.startswith('result.')]
|
|
||||||
|
|
||||||
# Create mapping
|
|
||||||
column_mapping = {}
|
|
||||||
for gt_col in gt_columns:
|
|
||||||
base_name = gt_col.replace('GT.', '')
|
|
||||||
result_col = f'result.{base_name}'
|
|
||||||
if result_col in result_columns:
|
|
||||||
column_mapping[gt_col] = result_col
|
|
||||||
|
|
||||||
# Function to compute match percentage for each GT-Result pair
|
|
||||||
def compute_match_percentages(df, column_mapping):
|
|
||||||
percentages = []
|
|
||||||
for gt_col, result_col in column_mapping.items():
|
|
||||||
count = 0
|
|
||||||
total = len(df)
|
|
||||||
|
|
||||||
for _, row in df.iterrows():
|
|
||||||
gt_val = row[gt_col]
|
|
||||||
result_val = row[result_col]
|
|
||||||
|
|
||||||
# Handle NaN values
|
|
||||||
if pd.isna(gt_val) or pd.isna(result_val):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Handle non-numeric values
|
|
||||||
try:
|
|
||||||
gt_float = float(gt_val)
|
|
||||||
result_float = float(result_val)
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
# Skip rows with non-numeric values
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Check if values are within 0.5 tolerance
|
|
||||||
if abs(gt_float - result_float) <= 0.5:
|
|
||||||
count += 1
|
|
||||||
|
|
||||||
percentage = (count / total) * 100
|
|
||||||
percentages.append({
|
|
||||||
'GT_Column': gt_col,
|
|
||||||
'Result_Column': result_col,
|
|
||||||
'Match_Percentage': round(percentage, 1)
|
|
||||||
})
|
|
||||||
|
|
||||||
return pd.DataFrame(percentages)
|
|
||||||
|
|
||||||
# Compute match percentages
|
|
||||||
match_df = compute_match_percentages(df, column_mapping)
|
|
||||||
|
|
||||||
# Create a pivot table for gradient display (optional but helpful)
|
|
||||||
pivot_table = match_df.set_index(['GT_Column', 'Result_Column'])['Match_Percentage'].unstack(fill_value=0)
|
|
||||||
|
|
||||||
# Apply gradient background
|
|
||||||
cm = sns.light_palette("green", as_cmap=True)
|
|
||||||
styled_table = pivot_table.style.background_gradient(cmap=cm, axis=None)
|
|
||||||
|
|
||||||
# Display result
|
|
||||||
print("Agreement Percentage Table (with gradient):")
|
|
||||||
styled_table
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Save the styled table to a file
|
|
||||||
styled_table.to_html("agreement_report.html")
|
|
||||||
print("Report saved to agreement_report.html")
|
|
||||||
57
figure1.py
57
figure1.py
@@ -263,3 +263,60 @@ plt.legend(frameon=False, loc='upper center', bbox_to_anchor=(0.5, -0.05))
|
|||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
plt.show()
|
plt.show()
|
||||||
##
|
##
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# %% name
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
# Data
|
||||||
|
data = {
|
||||||
|
'Visit': [9, 8, 7, 6, 5, 4, 3, 2, 1],
|
||||||
|
'patient_count': [2, 3, 3, 6, 13, 17, 28, 24, 32]
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create figure and axis
|
||||||
|
fig, ax = plt.subplots(figsize=(10, 6))
|
||||||
|
|
||||||
|
# Plot the bar chart
|
||||||
|
bars = ax.bar(data['Visit'], data['patient_count'], color='darkblue', label='Patients by Visit Count')
|
||||||
|
|
||||||
|
# Add labels and title
|
||||||
|
ax.set_xlabel('Visit Number (from last to first)', fontsize=12)
|
||||||
|
ax.set_ylabel('Number of Patients', fontsize=12)
|
||||||
|
ax.set_title('Patient Visits by Visit Number', fontsize=14)
|
||||||
|
|
||||||
|
# Invert x-axis to show Visit 9 on the left (descending order) if desired, but keep natural order (1–9 left to right)
|
||||||
|
# For descending order (9→1 from left to right), we'd need to reverse:
|
||||||
|
# Visit = data['Visit'][::-1], patient_count = data['patient_count'][::-1]
|
||||||
|
# But standard practice is ascending (1 to 9), so we'll sort accordingly:
|
||||||
|
# Let's sort by Visit to ensure left-to-right: 1,2,...,9
|
||||||
|
|
||||||
|
# Actually, your current Visit list is [9,8,...,1], which is descending.
|
||||||
|
# Let's sort by Visit for intuitive left-to-right increasing order:
|
||||||
|
sorted_indices = sorted(range(len(data['Visit'])), key=lambda i: data['Visit'][i])
|
||||||
|
visit_sorted = [data['Visit'][i] for i in sorted_indices]
|
||||||
|
count_sorted = [data['patient_count'][i] for i in sorted_indices]
|
||||||
|
|
||||||
|
# Re-plot with sorted x-axis:
|
||||||
|
ax.clear()
|
||||||
|
bars = ax.bar(visit_sorted, count_sorted, color='darkblue', label='Patients by Visit Count')
|
||||||
|
|
||||||
|
# Re-apply labels, etc.
|
||||||
|
ax.set_xlabel('Number of Visits', fontsize=12)
|
||||||
|
ax.set_ylabel('Number of Unique Patients', fontsize=12)
|
||||||
|
#ax.set_title('Number of Patients by Visit Number', fontsize=14)
|
||||||
|
|
||||||
|
# Add legend
|
||||||
|
ax.legend()
|
||||||
|
|
||||||
|
# Improve layout and grid
|
||||||
|
ax.grid(axis='y', linestyle='--', alpha=0.7)
|
||||||
|
plt.xticks(visit_sorted) # Ensure all integer visit numbers are shown
|
||||||
|
|
||||||
|
# Show the plot
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
##
|
||||||
|
|||||||
Reference in New Issue
Block a user