4542 lines
145 KiB
Python
4542 lines
145 KiB
Python
|
||
# %% API call1
|
||
#import time
|
||
#import json
|
||
#import os
|
||
#from datetime import datetime
|
||
#import pandas as pd
|
||
#from openai import OpenAI
|
||
#from dotenv import load_dotenv
|
||
#
|
||
## Load environment variables
|
||
#load_dotenv()
|
||
#
|
||
## === CONFIGURATION ===
|
||
#OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
||
#OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")
|
||
#MODEL_NAME = "GPT-OSS-120B"
|
||
#HEALTH_URL = f"{OPENAI_BASE_URL}/health" # Placeholder - actual health check would need to be implemented
|
||
#CHAT_URL = f"{OPENAI_BASE_URL}/chat/completions"
|
||
#
|
||
## File paths
|
||
#INPUT_CSV = "/home/shahin/Lab/Doktorarbeit/Barcelona/Data/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv"
|
||
#EDSS_INSTRUCTIONS_PATH = "/home/shahin/Lab/Doktorarbeit/Barcelona/attach/Komplett.txt"
|
||
##GRAMMAR_FILE = "/home/shahin/Lab/Doktorarbeit/Barcelona/attach/just_edss_schema.gbnf"
|
||
#
|
||
## Initialize OpenAI client
|
||
#client = OpenAI(
|
||
# api_key=OPENAI_API_KEY,
|
||
# base_url=OPENAI_BASE_URL
|
||
#)
|
||
#
|
||
## Read EDSS instructions from file
|
||
#with open(EDSS_INSTRUCTIONS_PATH, 'r') as f:
|
||
# EDSS_INSTRUCTIONS = f.read().strip()
|
||
## === RUN INFERENCE 2 ===
|
||
#def run_inference(patient_text):
|
||
# prompt = f'''
|
||
# Du bist ein medizinischer Assistent, der spezialisiert darauf ist, EDSS-Scores (Expanded Disability Status Scale) aus klinischen Berichten zu extrahieren.
|
||
#### Regeln für die Ausgabe:
|
||
#1. **Reason**: Erstelle eine prägnante Zusammenfassung (max. 400 Zeichen) der Befunde auf **DEUTSCH**, die zur Einstufung führen.
|
||
#2. **klassifizierbar**:
|
||
# - Setze dies auf **true**, wenn ein EDSS-Wert identifiziert, berechnet oder basierend auf den klinischen Hinweisen plausibel geschätzt werden kann.
|
||
# - Setze dies auf **false**, NUR wenn die Daten absolut unzureichend oder so widersprüchlich sind, dass keinerlei Einstufung möglich ist.
|
||
#3. **EDSS**:
|
||
# - Dieses Feld ist **VERPFLICHTEND**, wenn "klassifizierbar" auf true steht.
|
||
# - Es muss eine Zahl zwischen 0.0 und 10.0 sein.
|
||
# - Versuche stets, den EDSS-Wert so präzise wie möglich zu bestimmen, auch wenn die Datenlage dünn ist (nutze verfügbare Informationen zu Gehstrecke und Funktionssystemen).
|
||
# - Dieses Feld **DARF NICHT ERSCHEINEN**, wenn "klassifizierbar" auf false steht.
|
||
#
|
||
#### Einschränkungen:
|
||
#- Erfinde keine Fakten, aber nutze klinische Herleitungen aus dem Bericht, um den EDSS zu bestimmen.
|
||
#- Priorisiere die Vergabe eines EDSS-Wertes gegenüber der Markierung als nicht klassifizierbar.
|
||
#- Halte dich strikt an die JSON-Struktur.
|
||
#
|
||
#EDSS-Bewertungsrichtlinien:
|
||
#{EDSS_INSTRUCTIONS}
|
||
#
|
||
#Patientenbericht:
|
||
#{patient_text}
|
||
#'''
|
||
# start_time = time.time()
|
||
#
|
||
# try:
|
||
# # Make API call using OpenAI client
|
||
# response = client.chat.completions.create(
|
||
# messages=[
|
||
# {
|
||
# "role": "system",
|
||
# "content": "You extract EDSS scores. You prioritize providing a score even if data is partial, by using clinical inference."
|
||
# },
|
||
# {
|
||
# "role": "user",
|
||
# "content": prompt
|
||
# }
|
||
# ],
|
||
# model=MODEL_NAME,
|
||
# max_tokens=2048,
|
||
# temperature=0.0,
|
||
# response_format={"type": "json_object"}
|
||
# )
|
||
#
|
||
# # Extract content from response
|
||
# content = response.choices[0].message.content
|
||
#
|
||
# # Parse the JSON response
|
||
# parsed = json.loads(content)
|
||
#
|
||
# inference_time = time.time() - start_time
|
||
#
|
||
# return {
|
||
# "success": True,
|
||
# "result": parsed,
|
||
# "inference_time_sec": inference_time
|
||
# }
|
||
#
|
||
# except Exception as e:
|
||
# print(f"Inference error: {e}")
|
||
# return {
|
||
# "success": False,
|
||
# "error": str(e),
|
||
# "inference_time_sec": -1
|
||
# }
|
||
## === BUILD PATIENT TEXT ===
|
||
#def build_patient_text(row):
|
||
# return (
|
||
# str(row["T_Zusammenfassung"]) + "\n" +
|
||
# str(row["Diagnosen"]) + "\n" +
|
||
# str(row["T_KlinBef"]) + "\n" +
|
||
# str(row["T_Befunde"]) + "\n"
|
||
# )
|
||
#
|
||
#if __name__ == "__main__":
|
||
# # Read CSV file ONLY inside main block
|
||
# df = pd.read_csv(INPUT_CSV, sep=';')
|
||
# results = []
|
||
#
|
||
# # Process each row
|
||
# for idx, row in df.iterrows():
|
||
# print(f"Processing row {idx + 1}/{len(df)}")
|
||
# try:
|
||
# patient_text = build_patient_text(row)
|
||
# result = run_inference(patient_text)
|
||
#
|
||
# # Add unique_id and MedDatum to result for tracking
|
||
# result["unique_id"] = row.get("unique_id", f"row_{idx}")
|
||
# result["MedDatum"] = row.get("MedDatum", None)
|
||
#
|
||
# results.append(result)
|
||
# print(json.dumps(result, indent=2))
|
||
# except Exception as e:
|
||
# print(f"Error processing row {idx}: {e}")
|
||
# results.append({
|
||
# "success": False,
|
||
# "error": str(e),
|
||
# "unique_id": row.get("unique_id", f"row_{idx}"),
|
||
# "MedDatum": row.get("MedDatum", None)
|
||
# })
|
||
#
|
||
# # Save results to a JSON file
|
||
# output_json = INPUT_CSV.replace(".csv", "_results_Nisch.json")
|
||
# with open(output_json, 'w') as f:
|
||
# json.dump(results, f, indent=2)
|
||
# print(f"Results saved to {output_json}")
|
||
##
|
||
|
||
|
||
|
||
# %% API call1 - Enhanced with certainty scoring
|
||
#import time
|
||
#import json
|
||
#import os
|
||
#from datetime import datetime
|
||
#import pandas as pd
|
||
#from openai import OpenAI
|
||
#from dotenv import load_dotenv
|
||
#
|
||
## Load environment variables
|
||
#load_dotenv()
|
||
#
|
||
## === CONFIGURATION ===
|
||
#OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
||
#OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")
|
||
#MODEL_NAME = "GPT-OSS-120B"
|
||
#
|
||
## File paths
|
||
#INPUT_CSV = "/home/shahin/Lab/Doktorarbeit/Barcelona/Data/Test.csv"
|
||
#EDSS_INSTRUCTIONS_PATH = "/home/shahin/Lab/Doktorarbeit/Barcelona/attach/Komplett.txt"
|
||
#
|
||
## Initialize OpenAI client
|
||
#client = OpenAI(
|
||
# api_key=OPENAI_API_KEY,
|
||
# base_url=OPENAI_BASE_URL
|
||
#)
|
||
#
|
||
## Read EDSS instructions from file
|
||
#with open(EDSS_INSTRUCTIONS_PATH, 'r') as f:
|
||
# EDSS_INSTRUCTIONS = f.read().strip()
|
||
#
|
||
## === PROMPT WITH CERTAINTY REQUEST ===
|
||
#def build_prompt(patient_text):
|
||
# return f'''Du bist ein medizinischer Assistent, der spezialisiert darauf ist, EDSS-Scores (Expanded Disability Status Scale), alle Unterkategorien und die Bewertungssicherheit aus klinischen Berichten zu extrahieren.
|
||
#
|
||
#### Deine Aufgabe:
|
||
#1. Analysiere den Patientenbericht und extrahiere:
|
||
# - Den Gesamt-EDSS-Score (0.0–10.0)
|
||
# - Alle 8 EDSS-Unterkategorien (mit jeweils eigener Maximalpunktzahl)
|
||
#2. Schätze für jede Entscheidung die Sicherheit als Ganzzahl von 0–100 % ein.
|
||
#
|
||
#### Struktur der JSON-Ausgabe (VERPFLICHTEND):
|
||
#Gib NUR gültiges JSON zurück — kein Markdown, kein Text davor/dahinter.
|
||
#
|
||
#{{
|
||
# "reason": "Kernaussage zur EDSS-Begründung (max. 400 Zeichen, auf Deutsch).",
|
||
# "klassifizierbar": true/false,
|
||
# "EDSS": null ODER Zahl zwischen 0.0 und 10.0 (nur wenn klassifizierbar=true)",
|
||
# "certainty_percent": 0 ODER Zahl zwischen 0 und 100 (Ganzzahl)",
|
||
# "subcategories": {{
|
||
# "VISUAL_OPTIC_FUNCTIONS": null ODER Zahl zwischen 0.0 und 6.0,
|
||
# "BRAINSTEM_FUNCTIONS": null ODER Zahl zwischen 0.0 und 6.0,
|
||
# "PYRAMIDAL_FUNCTIONS": null ODER Zahl zwischen 0.0 und 6.0,
|
||
# "CEREBELLAR_FUNCTIONS": null ODER Zahl zwischen 0.0 und 6.0,
|
||
# "SENSORY_FUNCTIONS": null ODER Zahl zwischen 0.0 und 6.0,
|
||
# "BOWEL_AND_BLADDER_FUNCTIONS": null ODER Zahl zwischen 0.0 und 6.0,
|
||
# "CEREBRAL_FUNCTIONS": null ODER Zahl zwischen 0.0 und 6.0,
|
||
# "AMBULATION": null ODER Zahl zwischen 0.0 und 10.0
|
||
# }}
|
||
#}}
|
||
#
|
||
#### Regeln:
|
||
#- **reason**: Kurze, prägnante Begründung (auf Deutsch, max. 400 Zeichen), warum du den EDSS-Wert und die Unterkategorien so bewertest.
|
||
#- **klassifizierbar**:
|
||
# - `true`, wenn EDSS und mindestens die wichtigsten Unterkategorien *eindeutig ableitbar* oder *plausibel inferierbar* sind.
|
||
# - `false`, **nur**, wenn keine relevanten Daten vorliegen, oder diese so widersprüchlich/inkonsistent sind, dass keine vernünftige Einschätzung möglich ist.
|
||
#- **EDSS**:
|
||
# - **VERPFLICHTEND**, wenn `klassifizierbar=true`.
|
||
# - Zahl zwischen 0.0 und 10.0 (z.B. 3.0, 5.5). Darf **nicht** erscheinen, wenn `klassifizierbar=false`.
|
||
#- **certainty_percent**:
|
||
# - **Immer present** — Ganzzahl (0–100), basierend auf:
|
||
# - Klarheit und Vollständigkeit der Berichtsangaben,
|
||
# - Stichhaltigkeit der Schlussfolgerung (inkl. Inferenz),
|
||
# - Konsistenz zwischen den Unterkategorien.
|
||
#- **subcategories**:
|
||
# - **Immer present** — **alle 8 Unterkategorien** müssen enthalten sein.
|
||
# - Jeder Wert ist entweder:
|
||
# - `null` (wenn keine ausreichende Information vorliegt), **oder**
|
||
# - eine Zahl ≤ jeweiliger Obergrenze (z.B. Ambulation ≤ 10.0).
|
||
# - Wenn die Unterkategorie plausibel inferiert werden kann (auch indirekt), gib einen sinnvollen Wert ab.
|
||
# - Beispiel: Wenn „Gang mit Krückstock auf ebenem Boden bis 200 m“ steht, setze `AMBULATION: 5.5`.
|
||
#
|
||
#### EDSS-Bewertungsrichtlinien:
|
||
#{EDSS_INSTRUCTIONS}
|
||
#
|
||
#Patientenbericht:
|
||
#{patient_text}
|
||
#'''
|
||
#
|
||
## === INFERENCE FUNCTION ===
|
||
#def run_inference(patient_text):
|
||
# prompt = build_prompt(patient_text)
|
||
#
|
||
# start_time = time.time()
|
||
#
|
||
# try:
|
||
# response = client.chat.completions.create(
|
||
# messages=[
|
||
# {"role": "system", "content": "Du gibst EXKLUSIV gültiges JSON zurück — keine weiteren Erklärungen."}
|
||
# ] + [
|
||
# {"role": "user", "content": prompt}
|
||
# ],
|
||
# model=MODEL_NAME,
|
||
# max_tokens=2048,
|
||
# temperature=0.1, # Slightly higher for more natural certainty estimation (still low for reliability)
|
||
# response_format={"type": "json_object"}
|
||
# )
|
||
#
|
||
# content = response.choices[0].message.content
|
||
#
|
||
# # Parse and validate JSON
|
||
# try:
|
||
# parsed = json.loads(content)
|
||
# except json.JSONDecodeError as e:
|
||
# print(f"⚠️ JSON parsing failed: {e}")
|
||
# print("Raw response:", content[:500])
|
||
# raise ValueError("Model did not return valid JSON")
|
||
#
|
||
# # Enforce required keys
|
||
# if "certainty_percent" not in parsed:
|
||
# print("⚠️ Missing 'certainty_percent' in output! Force-adding fallback.")
|
||
# parsed["certainty_percent"] = 0 # fallback
|
||
# elif not isinstance(parsed["certainty_percent"], (int, float)):
|
||
# parsed["certainty_percent"] = int(parsed["certainty_percent"])
|
||
#
|
||
# # Clamp certainty to [0, 100]
|
||
# pct = parsed["certainty_percent"]
|
||
# parsed["certainty_percent"] =max(0, min(100, int(pct)))
|
||
#
|
||
# # Enforce EDSS rules: if not classifiable → remove EDSS
|
||
# if not parsed.get("klassifizierbar", False):
|
||
# if "EDSS" in parsed:
|
||
# del parsed["EDSS"] # per spec, must not appear if not classifiable
|
||
# else:
|
||
# if "EDSS" not in parsed:
|
||
# print("⚠️ 'klassifizierbar' is true but EDSS missing — adding fallback.")
|
||
# parsed["EDSS"] = 7.0 # last-resort fallback
|
||
#
|
||
# inference_time = time.time() - start_time
|
||
#
|
||
# return {
|
||
# "success": True,
|
||
# "result": parsed,
|
||
# "inference_time_sec": inference_time
|
||
# }
|
||
#
|
||
# except Exception as e:
|
||
# print(f"❌ Inference error: {e}")
|
||
# return {
|
||
# "success": False,
|
||
# "error": str(e),
|
||
# "inference_time_sec": -1,
|
||
# "result": None # no structured output
|
||
# }
|
||
#
|
||
## === BUILD PATIENT TEXT ===
|
||
#def build_patient_text(row):
|
||
# return (
|
||
# str(row.get("T_Zusammenfassung", "")) + "\n" +
|
||
# str(row.get("Diagnosen", "")) + "\n" +
|
||
# str(row.get("T_KlinBef", "")) + "\n" +
|
||
# str(row.get("T_Befunde", ""))
|
||
# )
|
||
#
|
||
#if __name__ == "__main__":
|
||
# # Load data
|
||
# df = pd.read_csv(INPUT_CSV, sep=';')
|
||
# results = []
|
||
#
|
||
# # Optional: limit for testing
|
||
# # df = df.head(3)
|
||
#
|
||
# print(f"Processing {len(df)} rows...")
|
||
# for idx, row in df.iterrows():
|
||
# print(f"\n— Row {idx + 1}/{len(df)} —")
|
||
# try:
|
||
# patient_text = build_patient_text(row)
|
||
# result = run_inference(patient_text)
|
||
#
|
||
# # Attach metadata
|
||
# result["unique_id"] = row.get("unique_id", f"row_{idx}")
|
||
# result["MedDatum"] = row.get("MedDatum", None)
|
||
#
|
||
# results.append(result)
|
||
#
|
||
# # Print summary
|
||
# if result["success"]:
|
||
# res = result["result"]
|
||
# edss = res.get("EDSS", "N/A") if res.get("klassifizierbar") else "N/A"
|
||
# print(f"✅ Result → EDSS={edss}, certainty={res.get('certainty_percent', 'N/A')}%")
|
||
# print(f" Reason: {res.get('reason', 'N/A')[:100]}…")
|
||
# else:
|
||
# print(f"❌ Failed: {result.get('error', 'Unknown error')[:100]}")
|
||
#
|
||
# except Exception as e:
|
||
# print(f"⚠️ Error processing row {idx}: {e}")
|
||
# results.append({
|
||
# "success": False,
|
||
# "error": str(e),
|
||
# "unique_id": row.get("unique_id", f"row_{idx}"),
|
||
# "MedDatum": row.get("MedDatum", None),
|
||
# "result": None
|
||
# })
|
||
#
|
||
# # Save results
|
||
# output_json = INPUT_CSV.replace(".csv", "_results_Nisch_certainty.json")
|
||
# with open(output_json, 'w', encoding='utf-8') as f:
|
||
# json.dump(results, f, indent=2, ensure_ascii=False)
|
||
# print(f"\n✅ Saved results to: {output_json}")
|
||
#
|
||
##
|
||
|
||
|
||
# %% API call - Multi-iteration EDSS + certainty extraction
|
||
#
|
||
#import time
|
||
#import json
|
||
#import os
|
||
#from datetime import datetime
|
||
#import pandas as pd
|
||
#from openai import OpenAI
|
||
#from dotenv import load_dotenv
|
||
#
|
||
## Load environment variables
|
||
#load_dotenv()
|
||
#
|
||
## === CONFIGURATION ===
|
||
#OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
||
#OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")
|
||
#MODEL_NAME = "GPT-OSS-120B"
|
||
#
|
||
## File paths
|
||
#INPUT_CSV = "/home/shahin/Lab/Doktorarbeit/Barcelona/Data/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv"
|
||
#EDSS_INSTRUCTIONS_PATH = "/home/shahin/Lab/Doktorarbeit/Barcelona/attach/Komplett.txt"
|
||
#
|
||
## Iteration settings
|
||
#NUM_ITERATIONS = 20
|
||
#STOP_ON_FIRST_ERROR = False # Set to True for debugging
|
||
#
|
||
## Initialize OpenAI client
|
||
#client = OpenAI(
|
||
# api_key=OPENAI_API_KEY,
|
||
# base_url=OPENAI_BASE_URL
|
||
#)
|
||
#
|
||
## Read EDSS instructions from file
|
||
#with open(EDSS_INSTRUCTIONS_PATH, 'r') as f:
|
||
# EDSS_INSTRUCTIONS = f.read().strip()
|
||
#
|
||
## === PROMPT (unchanged from before) ===
|
||
#def build_prompt(patient_text):
|
||
# return f'''Du bist ein medizinischer Assistent, der spezialisiert darauf ist, EDSS-Scores (Expanded Disability Status Scale), alle Unterkategorien und die Bewertungssicherheit aus klinischen Berichten zu extrahieren.
|
||
#
|
||
#### Deine Aufgabe:
|
||
#1. Analysiere den Patientenbericht und extrahiere:
|
||
# - Den Gesamt-EDSS-Score (0.0–10.0)
|
||
# - Alle 8 EDSS-Unterkategorien (mit jeweils eigener Maximalpunktzahl)
|
||
#2. Schätze für jede Entscheidung die Sicherheit als Ganzzahl von 0–100 % ein.
|
||
#
|
||
#### Struktur der JSON-Ausgabe (VERPFLICHTEND):
|
||
#Gib NUR gültiges JSON zurück — kein Markdown, kein Text davor/dahinter.
|
||
#
|
||
#{{
|
||
# "reason": "Kernaussage zur EDSS-Begründung (max. 400 Zeichen, auf Deutsch).",
|
||
# "klassifizierbar": true/false,
|
||
# "EDSS": null ODER Zahl zwischen 0.0 und 10.0 (nur wenn klassifizierbar=true)",
|
||
# "certainty_percent": 0 ODER Zahl zwischen 0 und 100 (Ganzzahl)",
|
||
# "subcategories": {{
|
||
# "VISUAL_OPTIC_FUNCTIONS": null ODER Zahl zwischen 0.0 und 6.0,
|
||
# "BRAINSTEM_FUNCTIONS": null ODER Zahl zwischen 0.0 und 6.0,
|
||
# "PYRAMIDAL_FUNCTIONS": null ODER Zahl zwischen 0.0 und 6.0,
|
||
# "CEREBELLAR_FUNCTIONS": null ODER Zahl zwischen 0.0 und 6.0,
|
||
# "SENSORY_FUNCTIONS": null ODER Zahl zwischen 0.0 und 6.0,
|
||
# "BOWEL_AND_BLADDER_FUNCTIONS": null ODER Zahl zwischen 0.0 und 6.0,
|
||
# "CEREBRAL_FUNCTIONS": null ODER Zahl zwischen 0.0 und 6.0,
|
||
# "AMBULATION": null ODER Zahl zwischen 0.0 und 10.0
|
||
# }}
|
||
#}}
|
||
#
|
||
#### Regeln:
|
||
#- **reason**: Kurze, prägnante Begründung (auf Deutsch, max. 400 Zeichen), warum du den EDSS-Wert und die Unterkategorien so bewertest.
|
||
#- **klassifizierbar**:
|
||
# - `true`, wenn EDSS und mindestens die wichtigsten Unterkategorien *eindeutig ableitbar* oder *plausibel inferierbar* sind.
|
||
# - `false`, **nur**, wenn keine relevanten Daten vorliegen, oder diese so widersprüchlich/inkonsistent sind, dass keine vernünftige Einschätzung möglich ist.
|
||
#- **EDSS**:
|
||
# - **VERPFLICHTEND**, wenn `klassifizierbar=true`.
|
||
# - Zahl zwischen 0.0 und 10.0 (z.B. 3.0, 5.5). Darf **nicht** erscheinen, wenn `klassifizierbar=false`.
|
||
#- **certainty_percent**:
|
||
# - **Immer present** — Ganzzahl (0–100), basierend auf:
|
||
# - Klarheit und Vollständigkeit der Berichtsangaben,
|
||
# - Stichhaltigkeit der Schlussfolgerung (inkl. Inferenz),
|
||
# - Konsistenz zwischen den Unterkategorien.
|
||
#- **subcategories**:
|
||
# - **Immer present** — **alle 8 Unterkategorien** müssen enthalten sein.
|
||
# - Jeder Wert ist entweder:
|
||
# - `null` (wenn keine ausreichende Information vorliegt), **oder**
|
||
# - eine Zahl ≤ jeweiliger Obergrenze (z.B. Ambulation ≤ 10.0).
|
||
# - Wenn die Unterkategorie plausibel inferiert werden kann (auch indirekt), gib einen sinnvollen Wert ab.
|
||
# - Beispiel: Wenn „Gang mit Krückstock auf ebenem Boden bis 200 m“ steht, setze `AMBULATION: 5.5`.
|
||
#
|
||
#### EDSS-Bewertungsrichtlinien:
|
||
#{EDSS_INSTRUCTIONS}
|
||
#
|
||
#Patientenbericht:
|
||
#{patient_text}
|
||
#'''
|
||
#
|
||
## === INFERENCE FUNCTION (unchanged) ===
|
||
#def run_inference(patient_text):
|
||
# prompt = build_prompt(patient_text)
|
||
#
|
||
# start_time = time.time()
|
||
#
|
||
# try:
|
||
# response = client.chat.completions.create(
|
||
# messages=[
|
||
# {"role": "system", "content": "Du gibst EXKLUSIV gültiges JSON zurück — keine weiteren Erklärungen."}
|
||
# ] + [
|
||
# {"role": "user", "content": prompt}
|
||
# ],
|
||
# model=MODEL_NAME,
|
||
# max_tokens=2048,
|
||
# temperature=0.1,
|
||
# response_format={"type": "json_object"}
|
||
# )
|
||
#
|
||
# content = response.choices[0].message.content
|
||
#
|
||
# # Parse and validate JSON
|
||
# try:
|
||
# parsed = json.loads(content)
|
||
# except json.JSONDecodeError as e:
|
||
# print(f"⚠️ JSON parsing failed: {e}")
|
||
# print("Raw response:", content[:500])
|
||
# raise ValueError("Model did not return valid JSON")
|
||
#
|
||
# # Enforce required keys
|
||
# if "certainty_percent" not in parsed:
|
||
# print("⚠️ Missing 'certainty_percent' in output! Force-adding fallback.")
|
||
# parsed["certainty_percent"] = 0
|
||
# elif not isinstance(parsed["certainty_percent"], (int, float)):
|
||
# parsed["certainty_percent"] = int(parsed["certainty_percent"])
|
||
#
|
||
# # Clamp certainty to [0, 100]
|
||
# pct = parsed["certainty_percent"]
|
||
# parsed["certainty_percent"] = max(0, min(100, int(pct)))
|
||
#
|
||
# # Enforce EDSS rules
|
||
# if not parsed.get("klassifizierbar", False):
|
||
# if "EDSS" in parsed:
|
||
# del parsed["EDSS"]
|
||
# else:
|
||
# if "EDSS" not in parsed:
|
||
# print("⚠️ 'klassifizierbar' is true but EDSS missing — adding fallback.")
|
||
# parsed["EDSS"] = 7.0
|
||
#
|
||
# inference_time = time.time() - start_time
|
||
#
|
||
# return {
|
||
# "success": True,
|
||
# "result": parsed,
|
||
# "inference_time_sec": inference_time
|
||
# }
|
||
#
|
||
# except Exception as e:
|
||
# print(f"❌ Inference error: {e}")
|
||
# return {
|
||
# "success": False,
|
||
# "error": str(e),
|
||
# "inference_time_sec": -1,
|
||
# "result": None
|
||
# }
|
||
#
|
||
## === BUILD PATIENT TEXT ===
|
||
#def build_patient_text(row):
|
||
# return (
|
||
# str(row.get("T_Zusammenfassung", "")) + "\n" +
|
||
# str(row.get("Diagnosen", "")) + "\n" +
|
||
# str(row.get("T_KlinBef", "")) + "\n" +
|
||
# str(row.get("T_Befunde", ""))
|
||
# )
|
||
#
|
||
## === MAIN LOOP (NEW: MULTI-ITERATION) ===
|
||
#if __name__ == "__main__":
|
||
# # Load data ONCE (to avoid repeated I/O overhead)
|
||
# df = pd.read_csv(INPUT_CSV, sep=';')
|
||
# total_rows = len(df)
|
||
# print(f"Loaded {total_rows} patient records.")
|
||
#
|
||
# for iteration in range(1, NUM_ITERATIONS + 1):
|
||
# print(f"\n{'='*60}")
|
||
# print(f"🔄 ITERATION {iteration}/{NUM_ITERATIONS}")
|
||
# print(f"{'='*60}")
|
||
#
|
||
# iteration_results = []
|
||
# start_iter = time.time()
|
||
#
|
||
# for idx, row in df.iterrows():
|
||
# print(f"\rRow {idx+1}/{total_rows} | Iter {iteration}", end='', flush=True)
|
||
# try:
|
||
# patient_text = build_patient_text(row)
|
||
# result = run_inference(patient_text)
|
||
#
|
||
# # Attach metadata
|
||
# if result["success"]:
|
||
# res = result["result"].copy() # avoid mutation
|
||
# res["iteration"] = iteration
|
||
# res["unique_id"] = row.get("unique_id", f"row_{idx}")
|
||
# res["MedDatum"] = row.get("MedDatum", None)
|
||
# result["result"] = res
|
||
#
|
||
# else:
|
||
# result["iteration"] = iteration
|
||
# result["unique_id"] = row.get("unique_id", f"row_{idx}")
|
||
# result["MedDatum"] = row.get("MedDatum", None)
|
||
#
|
||
# iteration_results.append(result)
|
||
#
|
||
# if result["success"]:
|
||
# res = result["result"]
|
||
# edss = res.get("EDSS", "N/A") if res.get("klassifizierbar") else "N/A"
|
||
# print(f" ✅ EDSS={edss}, cert={res.get('certainty_percent', '?')}%")
|
||
# else:
|
||
# print(f" ❌ {result.get('error', 'Unknown')}")
|
||
#
|
||
# except Exception as e:
|
||
# print(f"\n⚠️ Row {idx} failed: {e}")
|
||
# iteration_results.append({
|
||
# "success": False,
|
||
# "error": str(e),
|
||
# "iteration": iteration,
|
||
# "unique_id": row.get("unique_id", f"row_{idx}"),
|
||
# "MedDatum": row.get("MedDatum", None),
|
||
# "result": None
|
||
# })
|
||
# if STOP_ON_FIRST_ERROR:
|
||
# break
|
||
#
|
||
# # Save per-iteration results
|
||
# timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
# output_path = INPUT_CSV.replace(".csv", f"_results_iter_{iteration}_{timestamp}.json")
|
||
# with open(output_path, 'w', encoding='utf-8') as f:
|
||
# json.dump(iteration_results, f, indent=2, ensure_ascii=False)
|
||
# print(f"\n✅ Iteration {iteration} complete. Saved to: {output_path}")
|
||
#
|
||
# elapsed = time.time() - start_iter
|
||
# print(f"⏱️ Iteration {iteration} took {elapsed:.1f}s ({elapsed/total_rows:.1f}s/row)")
|
||
#
|
||
# print(f"\n🎉 All {NUM_ITERATIONS} iterations completed!")
|
||
#
|
||
#
|
||
|
||
##
|
||
|
||
|
||
|
||
# %% API call - Multi-model, multi-iteration EDSS + timing/resource benchmark
|
||
#
|
||
#import time
|
||
#import json
|
||
#import os
|
||
#import re
|
||
#import threading
|
||
#from datetime import datetime
|
||
#from pathlib import Path
|
||
#
|
||
#import pandas as pd
|
||
#from openai import OpenAI
|
||
#from dotenv import load_dotenv
|
||
#
|
||
#try:
|
||
# import psutil
|
||
#except ImportError:
|
||
# psutil = None
|
||
# print("⚠️ psutil is not installed. Resource metrics will be limited.")
|
||
# print("Install with: pip install psutil")
|
||
#
|
||
#
|
||
## =========================
|
||
## CONFIGURATION
|
||
## =========================
|
||
#
|
||
#load_dotenv()
|
||
#
|
||
#OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
||
#OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")
|
||
#
|
||
#MODEL_NAMES = [
|
||
# # "GPT-OSS-120B",
|
||
# "qwen3.6-27b",
|
||
# # "gemma-4-31B-it",
|
||
#]
|
||
#
|
||
#INPUT_CSV = "/home/shahin/Lab/Doktorarbeit/Barcelona/Data/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv"
|
||
#EDSS_INSTRUCTIONS_PATH = "/home/shahin/Lab/Doktorarbeit/Barcelona/attach/Komplett.txt"
|
||
#
|
||
#RESULTS_ROOT = "/home/shahin/Lab/Doktorarbeit/Barcelona/results_edss_benchmark"
|
||
#
|
||
#NUM_ITERATIONS = 20
|
||
#STOP_ON_FIRST_ERROR = False
|
||
#
|
||
#MAX_TOKENS = 2048
|
||
#TEMPERATURE = 0.1
|
||
#
|
||
## Memory sampling interval during one inference call
|
||
#RESOURCE_SAMPLE_INTERVAL_SEC = 0.05
|
||
#
|
||
#
|
||
## =========================
|
||
## CLIENT
|
||
## =========================
|
||
#
|
||
#client = OpenAI(
|
||
# api_key=OPENAI_API_KEY,
|
||
# base_url=OPENAI_BASE_URL
|
||
#)
|
||
#
|
||
#
|
||
## =========================
|
||
## HELPERS
|
||
## =========================
|
||
#
|
||
#def safe_dir_name(name: str) -> str:
|
||
# """
|
||
# Convert model name to a filesystem-safe directory name.
|
||
# """
|
||
# name = str(name).strip()
|
||
# name = re.sub(r"[^\w\-.]+", "_", name)
|
||
# return name[:150]
|
||
#
|
||
#
|
||
#def now_timestamp() -> str:
|
||
# return datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
#
|
||
#
|
||
#def get_process():
|
||
# if psutil is None:
|
||
# return None
|
||
# return psutil.Process(os.getpid())
|
||
#
|
||
#
|
||
#def get_memory_rss_mb(process=None):
|
||
# if psutil is None:
|
||
# return None
|
||
# if process is None:
|
||
# process = get_process()
|
||
# return process.memory_info().rss / (1024 * 1024)
|
||
#
|
||
#
|
||
#def get_cpu_times_sec(process=None):
|
||
# if psutil is None:
|
||
# return None
|
||
# if process is None:
|
||
# process = get_process()
|
||
# cpu_times = process.cpu_times()
|
||
# return cpu_times.user + cpu_times.system
|
||
#
|
||
#
|
||
#class ResourceSampler:
|
||
# """
|
||
# Samples process RSS while an inference call is running.
|
||
# Useful for approximating peak memory usage per request.
|
||
# """
|
||
#
|
||
# def __init__(self, interval_sec=0.05):
|
||
# self.interval_sec = interval_sec
|
||
# self.process = get_process()
|
||
# self.running = False
|
||
# self.thread = None
|
||
# self.samples_mb = []
|
||
#
|
||
# def start(self):
|
||
# if psutil is None:
|
||
# return
|
||
#
|
||
# self.running = True
|
||
# self.samples_mb = []
|
||
# self.thread = threading.Thread(target=self._sample_loop, daemon=True)
|
||
# self.thread.start()
|
||
#
|
||
# def stop(self):
|
||
# if psutil is None:
|
||
# return
|
||
#
|
||
# self.running = False
|
||
# if self.thread is not None:
|
||
# self.thread.join(timeout=1.0)
|
||
#
|
||
# def _sample_loop(self):
|
||
# while self.running:
|
||
# try:
|
||
# rss_mb = get_memory_rss_mb(self.process)
|
||
# self.samples_mb.append(rss_mb)
|
||
# except Exception:
|
||
# pass
|
||
# time.sleep(self.interval_sec)
|
||
#
|
||
# @property
|
||
# def peak_rss_mb(self):
|
||
# if not self.samples_mb:
|
||
# return None
|
||
# return max(self.samples_mb)
|
||
#
|
||
#
|
||
## =========================
|
||
## READ INSTRUCTIONS
|
||
## =========================
|
||
#
|
||
#with open(EDSS_INSTRUCTIONS_PATH, "r", encoding="utf-8") as f:
|
||
# EDSS_INSTRUCTIONS = f.read().strip()
|
||
#
|
||
#
|
||
## =========================
|
||
## PROMPT
|
||
## =========================
|
||
#
|
||
#def build_prompt(patient_text):
|
||
# return f'''Du bist ein medizinischer Assistent, der spezialisiert darauf ist, EDSS-Scores (Expanded Disability Status Scale), alle Unterkategorien und die Bewertungssicherheit aus klinischen Berichten zu extrahieren.
|
||
#
|
||
#### Deine Aufgabe:
|
||
#1. Analysiere den Patientenbericht und extrahiere:
|
||
# - Den Gesamt-EDSS-Score (0.0–10.0)
|
||
# - Alle 8 EDSS-Unterkategorien (mit jeweils eigener Maximalpunktzahl)
|
||
#2. Schätze für jede Entscheidung die Sicherheit als Ganzzahl von 0–100 % ein.
|
||
#
|
||
#### Struktur der JSON-Ausgabe (VERPFLICHTEND):
|
||
#Gib NUR gültiges JSON zurück — kein Markdown, kein Text davor/dahinter.
|
||
#
|
||
#{{
|
||
# "reason": "Kernaussage zur EDSS-Begründung (max. 400 Zeichen, auf Deutsch).",
|
||
# "klassifizierbar": true/false,
|
||
# "EDSS": null ODER Zahl zwischen 0.0 und 10.0 (nur wenn klassifizierbar=true)",
|
||
# "certainty_percent": 0 ODER Zahl zwischen 0 und 100 (Ganzzahl)",
|
||
# "subcategories": {{
|
||
# "VISUAL_OPTIC_FUNCTIONS": null ODER Zahl zwischen 0.0 und 6.0,
|
||
# "BRAINSTEM_FUNCTIONS": null ODER Zahl zwischen 0.0 und 6.0,
|
||
# "PYRAMIDAL_FUNCTIONS": null ODER Zahl zwischen 0.0 und 6.0,
|
||
# "CEREBELLAR_FUNCTIONS": null ODER Zahl zwischen 0.0 und 6.0,
|
||
# "SENSORY_FUNCTIONS": null ODER Zahl zwischen 0.0 und 6.0,
|
||
# "BOWEL_AND_BLADDER_FUNCTIONS": null ODER Zahl zwischen 0.0 und 6.0,
|
||
# "CEREBRAL_FUNCTIONS": null ODER Zahl zwischen 0.0 und 6.0,
|
||
# "AMBULATION": null ODER Zahl zwischen 0.0 und 10.0
|
||
# }}
|
||
#}}
|
||
#
|
||
#### Regeln:
|
||
#- **reason**: Kurze, prägnante Begründung (auf Deutsch, max. 400 Zeichen), warum du den EDSS-Wert und die Unterkategorien so bewertest.
|
||
#- **klassifizierbar**:
|
||
# - `true`, wenn EDSS und mindestens die wichtigsten Unterkategorien *eindeutig ableitbar* oder *plausibel inferierbar* sind.
|
||
# - `false`, **nur**, wenn keine relevanten Daten vorliegen, oder diese so widersprüchlich/inkonsistent sind, dass keine vernünftige Einschätzung möglich ist.
|
||
#- **EDSS**:
|
||
# - **VERPFLICHTEND**, wenn `klassifizierbar=true`.
|
||
# - Zahl zwischen 0.0 und 10.0 (z.B. 3.0, 5.5). Darf **nicht** erscheinen, wenn `klassifizierbar=false`.
|
||
#- **certainty_percent**:
|
||
# - **Immer present** — Ganzzahl (0–100), basierend auf:
|
||
# - Klarheit und Vollständigkeit der Berichtsangaben,
|
||
# - Stichhaltigkeit der Schlussfolgerung (inkl. Inferenz),
|
||
# - Konsistenz zwischen den Unterkategorien.
|
||
#- **subcategories**:
|
||
# - **Immer present** — **alle 8 Unterkategorien** müssen enthalten sein.
|
||
# - Jeder Wert ist entweder:
|
||
# - `null` (wenn keine ausreichende Information vorliegt), **oder**
|
||
# - eine Zahl ≤ jeweiliger Obergrenze (z.B. Ambulation ≤ 10.0).
|
||
# - Wenn die Unterkategorie plausibel inferiert werden kann (auch indirekt), gib einen sinnvollen Wert ab.
|
||
# - Beispiel: Wenn „Gang mit Krückstock auf ebenem Boden bis 200 m“ steht, setze `AMBULATION: 5.5`.
|
||
#
|
||
#### EDSS-Bewertungsrichtlinien:
|
||
#{EDSS_INSTRUCTIONS}
|
||
#
|
||
#Patientenbericht:
|
||
#{patient_text}
|
||
#'''
|
||
#
|
||
#
|
||
## =========================
|
||
## VALIDATION / NORMALIZATION
|
||
## =========================
|
||
#
|
||
#def normalize_model_output(parsed):
|
||
# """
|
||
# Keeps your existing validation behavior, with a few extra safety checks.
|
||
# """
|
||
#
|
||
# if not isinstance(parsed, dict):
|
||
# raise ValueError("Parsed model output is not a JSON object")
|
||
#
|
||
# if "certainty_percent" not in parsed:
|
||
# print("⚠️ Missing 'certainty_percent' in output. Force-adding fallback.")
|
||
# parsed["certainty_percent"] = 0
|
||
# elif not isinstance(parsed["certainty_percent"], (int, float)):
|
||
# parsed["certainty_percent"] = int(parsed["certainty_percent"])
|
||
#
|
||
# parsed["certainty_percent"] = max(0, min(100, int(parsed["certainty_percent"])))
|
||
#
|
||
# if "klassifizierbar" not in parsed:
|
||
# parsed["klassifizierbar"] = False
|
||
#
|
||
# if not parsed.get("klassifizierbar", False):
|
||
# parsed.pop("EDSS", None)
|
||
# else:
|
||
# if "EDSS" not in parsed:
|
||
# print("⚠️ 'klassifizierbar' is true but EDSS missing — adding fallback.")
|
||
# parsed["EDSS"] = 7.0
|
||
#
|
||
# required_subcategories = {
|
||
# "VISUAL_OPTIC_FUNCTIONS": 6.0,
|
||
# "BRAINSTEM_FUNCTIONS": 6.0,
|
||
# "PYRAMIDAL_FUNCTIONS": 6.0,
|
||
# "CEREBELLAR_FUNCTIONS": 6.0,
|
||
# "SENSORY_FUNCTIONS": 6.0,
|
||
# "BOWEL_AND_BLADDER_FUNCTIONS": 6.0,
|
||
# "CEREBRAL_FUNCTIONS": 6.0,
|
||
# "AMBULATION": 10.0,
|
||
# }
|
||
#
|
||
# if "subcategories" not in parsed or not isinstance(parsed["subcategories"], dict):
|
||
# parsed["subcategories"] = {}
|
||
#
|
||
# for key in required_subcategories:
|
||
# if key not in parsed["subcategories"]:
|
||
# parsed["subcategories"][key] = None
|
||
#
|
||
# return parsed
|
||
#
|
||
#
|
||
## =========================
|
||
## INFERENCE FUNCTION
|
||
## =========================
|
||
#
|
||
#def run_inference(patient_text, model_name):
|
||
# prompt = build_prompt(patient_text)
|
||
#
|
||
# process = get_process()
|
||
# sampler = ResourceSampler(interval_sec=RESOURCE_SAMPLE_INTERVAL_SEC)
|
||
#
|
||
# wall_start = time.perf_counter()
|
||
# cpu_start = get_cpu_times_sec(process)
|
||
# rss_start_mb = get_memory_rss_mb(process)
|
||
#
|
||
# sampler.start()
|
||
#
|
||
# try:
|
||
# response = client.chat.completions.create(
|
||
# messages=[
|
||
# {
|
||
# "role": "system",
|
||
# "content": "Du gibst EXKLUSIV gültiges JSON zurück — keine weiteren Erklärungen."
|
||
# },
|
||
# {
|
||
# "role": "user",
|
||
# "content": prompt
|
||
# }
|
||
# ],
|
||
# model=model_name,
|
||
# max_tokens=MAX_TOKENS,
|
||
# temperature=TEMPERATURE,
|
||
# response_format={"type": "json_object"}
|
||
# )
|
||
#
|
||
# content = response.choices[0].message.content
|
||
#
|
||
# try:
|
||
# parsed = json.loads(content)
|
||
# except json.JSONDecodeError as e:
|
||
# print(f"⚠️ JSON parsing failed: {e}")
|
||
# print("Raw response:", content[:500])
|
||
# raise ValueError("Model did not return valid JSON")
|
||
#
|
||
# parsed = normalize_model_output(parsed)
|
||
#
|
||
# usage = getattr(response, "usage", None)
|
||
#
|
||
# if usage is not None:
|
||
# prompt_tokens = getattr(usage, "prompt_tokens", None)
|
||
# completion_tokens = getattr(usage, "completion_tokens", None)
|
||
# total_tokens = getattr(usage, "total_tokens", None)
|
||
# else:
|
||
# prompt_tokens = None
|
||
# completion_tokens = None
|
||
# total_tokens = None
|
||
#
|
||
# success = True
|
||
# error = None
|
||
# result = parsed
|
||
#
|
||
# except Exception as e:
|
||
# print(f"❌ Inference error: {e}")
|
||
#
|
||
# success = False
|
||
# error = str(e)
|
||
# result = None
|
||
# prompt_tokens = None
|
||
# completion_tokens = None
|
||
# total_tokens = None
|
||
#
|
||
# finally:
|
||
# sampler.stop()
|
||
#
|
||
# wall_end = time.perf_counter()
|
||
# cpu_end = get_cpu_times_sec(process)
|
||
# rss_end_mb = get_memory_rss_mb(process)
|
||
#
|
||
# wall_time_sec = wall_end - wall_start
|
||
#
|
||
# if cpu_start is not None and cpu_end is not None:
|
||
# process_cpu_time_sec = cpu_end - cpu_start
|
||
# else:
|
||
# process_cpu_time_sec = None
|
||
#
|
||
# if rss_start_mb is not None and rss_end_mb is not None:
|
||
# rss_delta_mb = rss_end_mb - rss_start_mb
|
||
# else:
|
||
# rss_delta_mb = None
|
||
#
|
||
# return {
|
||
# "success": success,
|
||
# "error": error,
|
||
# "result": result,
|
||
#
|
||
# "model": model_name,
|
||
#
|
||
# # Main inference timing
|
||
# "inference_time_sec": wall_time_sec,
|
||
#
|
||
# # Resource metrics
|
||
# "process_cpu_time_sec": process_cpu_time_sec,
|
||
# "rss_before_mb": rss_start_mb,
|
||
# "rss_after_mb": rss_end_mb,
|
||
# "rss_delta_mb": rss_delta_mb,
|
||
# "peak_rss_mb": sampler.peak_rss_mb,
|
||
#
|
||
# # Token metrics, when available
|
||
# "prompt_tokens": prompt_tokens,
|
||
# "completion_tokens": completion_tokens,
|
||
# "total_tokens": total_tokens,
|
||
# }
|
||
#
|
||
#
|
||
## =========================
|
||
## BUILD PATIENT TEXT
|
||
## =========================
|
||
#
|
||
#def build_patient_text(row):
|
||
# return (
|
||
# str(row.get("T_Zusammenfassung", "")) + "\n" +
|
||
# str(row.get("Diagnosen", "")) + "\n" +
|
||
# str(row.get("T_KlinBef", "")) + "\n" +
|
||
# str(row.get("T_Befunde", ""))
|
||
# )
|
||
#
|
||
#
|
||
## =========================
|
||
## FLATTEN RESULTS FOR CSV
|
||
## =========================
|
||
#
|
||
#def flatten_result(record):
|
||
# """
|
||
# Converts one result record to a flat row for CSV export.
|
||
# """
|
||
#
|
||
# flat = {
|
||
# "model": record.get("model"),
|
||
# "iteration": record.get("iteration"),
|
||
# "row_index": record.get("row_index"),
|
||
# "unique_id": record.get("unique_id"),
|
||
# "MedDatum": record.get("MedDatum"),
|
||
#
|
||
# "success": record.get("success"),
|
||
# "error": record.get("error"),
|
||
#
|
||
# "inference_time_sec": record.get("inference_time_sec"),
|
||
# "process_cpu_time_sec": record.get("process_cpu_time_sec"),
|
||
# "rss_before_mb": record.get("rss_before_mb"),
|
||
# "rss_after_mb": record.get("rss_after_mb"),
|
||
# "rss_delta_mb": record.get("rss_delta_mb"),
|
||
# "peak_rss_mb": record.get("peak_rss_mb"),
|
||
#
|
||
# "prompt_tokens": record.get("prompt_tokens"),
|
||
# "completion_tokens": record.get("completion_tokens"),
|
||
# "total_tokens": record.get("total_tokens"),
|
||
# }
|
||
#
|
||
# result = record.get("result")
|
||
#
|
||
# if isinstance(result, dict):
|
||
# flat["reason"] = result.get("reason")
|
||
# flat["klassifizierbar"] = result.get("klassifizierbar")
|
||
# flat["EDSS"] = result.get("EDSS")
|
||
# flat["certainty_percent"] = result.get("certainty_percent")
|
||
#
|
||
# subcats = result.get("subcategories", {})
|
||
# if isinstance(subcats, dict):
|
||
# for key, value in subcats.items():
|
||
# flat[f"subcat_{key}"] = value
|
||
#
|
||
# return flat
|
||
#
|
||
#
|
||
#def summarize_records(records):
|
||
# """
|
||
# Creates summary statistics for one model over all iterations.
|
||
# """
|
||
#
|
||
# df = pd.DataFrame([flatten_result(r) for r in records])
|
||
#
|
||
# if df.empty:
|
||
# return pd.DataFrame()
|
||
#
|
||
# summary = {
|
||
# "model": df["model"].iloc[0] if "model" in df.columns else None,
|
||
# "n_records": len(df),
|
||
# "n_success": int(df["success"].sum()) if "success" in df.columns else None,
|
||
# "n_failed": int((~df["success"]).sum()) if "success" in df.columns else None,
|
||
# "success_rate": float(df["success"].mean()) if "success" in df.columns else None,
|
||
# }
|
||
#
|
||
# numeric_cols = [
|
||
# "inference_time_sec",
|
||
# "process_cpu_time_sec",
|
||
# "rss_delta_mb",
|
||
# "peak_rss_mb",
|
||
# "prompt_tokens",
|
||
# "completion_tokens",
|
||
# "total_tokens",
|
||
# "certainty_percent",
|
||
# "EDSS",
|
||
# ]
|
||
#
|
||
# for col in numeric_cols:
|
||
# if col in df.columns:
|
||
# values = pd.to_numeric(df[col], errors="coerce")
|
||
# summary[f"{col}_mean"] = values.mean()
|
||
# summary[f"{col}_median"] = values.median()
|
||
# summary[f"{col}_std"] = values.std()
|
||
# summary[f"{col}_min"] = values.min()
|
||
# summary[f"{col}_max"] = values.max()
|
||
#
|
||
# return pd.DataFrame([summary])
|
||
#
|
||
#
|
||
## =========================
|
||
## MAIN LOOP
|
||
## =========================
|
||
#
|
||
#if __name__ == "__main__":
|
||
#
|
||
# run_timestamp = now_timestamp()
|
||
#
|
||
# results_root = Path(RESULTS_ROOT)
|
||
# results_root.mkdir(parents=True, exist_ok=True)
|
||
#
|
||
# run_root = results_root / f"run_{run_timestamp}"
|
||
# run_root.mkdir(parents=True, exist_ok=True)
|
||
#
|
||
# print(f"Results root: {run_root}")
|
||
#
|
||
# df = pd.read_csv(INPUT_CSV, sep=";")
|
||
# total_rows = len(df)
|
||
#
|
||
# print(f"Loaded {total_rows} patient records.")
|
||
# print(f"Models: {MODEL_NAMES}")
|
||
# print(f"Iterations per model: {NUM_ITERATIONS}")
|
||
#
|
||
# all_model_summaries = []
|
||
#
|
||
# for model_name in MODEL_NAMES:
|
||
# safe_model = safe_dir_name(model_name)
|
||
# model_dir = run_root / safe_model
|
||
# model_dir.mkdir(parents=True, exist_ok=True)
|
||
#
|
||
# print(f"\n{'#' * 80}")
|
||
# print(f"MODEL: {model_name}")
|
||
# print(f"Saving to: {model_dir}")
|
||
# print(f"{'#' * 80}")
|
||
#
|
||
# model_records = []
|
||
# model_start = time.perf_counter()
|
||
#
|
||
# for iteration in range(1, NUM_ITERATIONS + 1):
|
||
# print(f"\n{'=' * 60}")
|
||
# print(f"🔄 MODEL {model_name} | ITERATION {iteration}/{NUM_ITERATIONS}")
|
||
# print(f"{'=' * 60}")
|
||
#
|
||
# iteration_results = []
|
||
# iteration_start = time.perf_counter()
|
||
#
|
||
# for idx, row in df.iterrows():
|
||
# print(
|
||
# f"\rModel={model_name} | Row {idx + 1}/{total_rows} | Iter {iteration}",
|
||
# end="",
|
||
# flush=True
|
||
# )
|
||
#
|
||
# try:
|
||
# patient_text = build_patient_text(row)
|
||
# record = run_inference(patient_text, model_name=model_name)
|
||
#
|
||
# record["iteration"] = iteration
|
||
# record["row_index"] = int(idx)
|
||
# record["unique_id"] = row.get("unique_id", f"row_{idx}")
|
||
# record["MedDatum"] = row.get("MedDatum", None)
|
||
#
|
||
# iteration_results.append(record)
|
||
# model_records.append(record)
|
||
#
|
||
# if record["success"]:
|
||
# res = record["result"]
|
||
# edss = res.get("EDSS", "N/A") if res.get("klassifizierbar") else "N/A"
|
||
# print(
|
||
# f" ✅ EDSS={edss}, "
|
||
# f"cert={res.get('certainty_percent', '?')}%, "
|
||
# f"time={record['inference_time_sec']:.2f}s"
|
||
# )
|
||
# else:
|
||
# print(f" ❌ {record.get('error', 'Unknown error')}")
|
||
#
|
||
# except Exception as e:
|
||
# print(f"\n⚠️ Row {idx} failed outside inference wrapper: {e}")
|
||
#
|
||
# fallback_record = {
|
||
# "success": False,
|
||
# "error": str(e),
|
||
# "result": None,
|
||
#
|
||
# "model": model_name,
|
||
# "iteration": iteration,
|
||
# "row_index": int(idx),
|
||
# "unique_id": row.get("unique_id", f"row_{idx}"),
|
||
# "MedDatum": row.get("MedDatum", None),
|
||
#
|
||
# "inference_time_sec": None,
|
||
# "process_cpu_time_sec": None,
|
||
# "rss_before_mb": None,
|
||
# "rss_after_mb": None,
|
||
# "rss_delta_mb": None,
|
||
# "peak_rss_mb": None,
|
||
#
|
||
# "prompt_tokens": None,
|
||
# "completion_tokens": None,
|
||
# "total_tokens": None,
|
||
# }
|
||
#
|
||
# iteration_results.append(fallback_record)
|
||
# model_records.append(fallback_record)
|
||
#
|
||
# if STOP_ON_FIRST_ERROR:
|
||
# break
|
||
#
|
||
# iteration_elapsed = time.perf_counter() - iteration_start
|
||
#
|
||
# # Save per-iteration JSON
|
||
# iter_json_path = model_dir / f"{safe_model}_results_iter_{iteration}_{run_timestamp}.json"
|
||
# with open(iter_json_path, "w", encoding="utf-8") as f:
|
||
# json.dump(iteration_results, f, indent=2, ensure_ascii=False)
|
||
#
|
||
# # Save per-iteration CSV
|
||
# iter_csv_path = model_dir / f"{safe_model}_results_iter_{iteration}_{run_timestamp}.csv"
|
||
# iter_flat_df = pd.DataFrame([flatten_result(r) for r in iteration_results])
|
||
# iter_flat_df.to_csv(iter_csv_path, index=False)
|
||
#
|
||
# print(f"\n✅ Iteration {iteration} complete.")
|
||
# print(f"JSON saved to: {iter_json_path}")
|
||
# print(f"CSV saved to: {iter_csv_path}")
|
||
# print(
|
||
# f"⏱️ Iteration time: {iteration_elapsed:.1f}s "
|
||
# f"({iteration_elapsed / max(total_rows, 1):.2f}s/row)"
|
||
# )
|
||
#
|
||
# model_elapsed = time.perf_counter() - model_start
|
||
#
|
||
# # Save all records for this model
|
||
# model_json_path = model_dir / f"{safe_model}_all_results_{run_timestamp}.json"
|
||
# with open(model_json_path, "w", encoding="utf-8") as f:
|
||
# json.dump(model_records, f, indent=2, ensure_ascii=False)
|
||
#
|
||
# model_csv_path = model_dir / f"{safe_model}_all_results_{run_timestamp}.csv"
|
||
# model_flat_df = pd.DataFrame([flatten_result(r) for r in model_records])
|
||
# model_flat_df.to_csv(model_csv_path, index=False)
|
||
#
|
||
# # Save model summary
|
||
# model_summary_df = summarize_records(model_records)
|
||
# model_summary_df["model_total_wall_time_sec"] = model_elapsed
|
||
# model_summary_df["model_total_wall_time_min"] = model_elapsed / 60
|
||
#
|
||
# model_summary_path = model_dir / f"{safe_model}_summary_{run_timestamp}.csv"
|
||
# model_summary_df.to_csv(model_summary_path, index=False)
|
||
#
|
||
# all_model_summaries.append(model_summary_df)
|
||
#
|
||
# print(f"\n🎉 Model completed: {model_name}")
|
||
# print(f"All JSON: {model_json_path}")
|
||
# print(f"All CSV: {model_csv_path}")
|
||
# print(f"Summary: {model_summary_path}")
|
||
# print(f"Total model time: {model_elapsed / 60:.2f} min")
|
||
#
|
||
# # Save combined model summaries
|
||
# if all_model_summaries:
|
||
# combined_summary_df = pd.concat(all_model_summaries, ignore_index=True)
|
||
# combined_summary_path = run_root / f"all_models_summary_{run_timestamp}.csv"
|
||
# combined_summary_df.to_csv(combined_summary_path, index=False)
|
||
#
|
||
# print(f"\n📊 Combined summary saved to: {combined_summary_path}")
|
||
#
|
||
# print(f"\n🎉 All models and all iterations completed!")
|
||
#
|
||
##
|
||
|
||
|
||
|
||
|
||
|
||
|
||
# %% API call - Multi-model, multi-iteration EDSS + timing/resource benchmark
|
||
|
||
#import time
|
||
#import json
|
||
#import os
|
||
#import re
|
||
#import threading
|
||
#from datetime import datetime
|
||
#from pathlib import Path
|
||
#
|
||
#import pandas as pd
|
||
#from openai import OpenAI
|
||
#from dotenv import load_dotenv
|
||
#
|
||
#try:
|
||
# import psutil
|
||
#except ImportError:
|
||
# psutil = None
|
||
# print("⚠️ psutil is not installed. Resource metrics will be limited.")
|
||
# print("Install with: pip install psutil")
|
||
#
|
||
#
|
||
# =========================
|
||
# CONFIGURATION
|
||
# =========================
|
||
#
|
||
#load_dotenv()
|
||
#
|
||
#OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
||
#OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")
|
||
#
|
||
#MODEL_CONFIGS = [
|
||
# {
|
||
# "model_name": "qwen3.6-35b-a3b",
|
||
# "use_response_format": False,
|
||
# "temperature": 0.0,
|
||
# "max_tokens": 4096,
|
||
#
|
||
# # If your backend is vLLM / Qwen chat-template compatible,
|
||
# # this may reduce long hidden reasoning and JSON truncation.
|
||
# # If your server errors because of extra_body, set this to None.
|
||
# "extra_body": {
|
||
# "chat_template_kwargs": {
|
||
# "enable_thinking": False
|
||
# }
|
||
# },
|
||
# },
|
||
# {
|
||
# "model_name": "gemma-4-31B-it",
|
||
# "use_response_format": False,
|
||
# "temperature": 0.0,
|
||
# "max_tokens": 4096,
|
||
# "extra_body": None,
|
||
# },
|
||
# # {
|
||
# # "model_name": "GPT-OSS-120B",
|
||
# # "use_response_format": True,
|
||
# # "temperature": 0.0,
|
||
# # "max_tokens": 4096,
|
||
# # "extra_body": None,
|
||
# # },
|
||
#]
|
||
#
|
||
#INPUT_CSV = "/home/shahin/Lab/Doktorarbeit/Barcelona/Data/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv"
|
||
#EDSS_INSTRUCTIONS_PATH = "/home/shahin/Lab/Doktorarbeit/Barcelona/attach/Komplett.txt"
|
||
#
|
||
#RESULTS_ROOT = "/home/shahin/Lab/Doktorarbeit/Barcelona/results_edss_benchmark"
|
||
#
|
||
#NUM_ITERATIONS = 10
|
||
#STOP_ON_FIRST_ERROR = False
|
||
#
|
||
# For testing, set to e.g. 2.
|
||
# For full run, set to None.
|
||
#MAX_ROWS = 2
|
||
# MAX_ROWS = 2
|
||
#
|
||
#MAX_TOKENS = 4096
|
||
#TEMPERATURE = 0.0
|
||
#
|
||
#RESOURCE_SAMPLE_INTERVAL_SEC = 0.05
|
||
#
|
||
#SAVE_EVERY_N_ROWS = 1
|
||
#
|
||
# Retries for invalid JSON / truncated JSON
|
||
#MAX_JSON_RETRIES = 2
|
||
#RETRY_SLEEP_SEC = 2
|
||
#
|
||
#
|
||
# =========================
|
||
# CLIENT
|
||
# =========================
|
||
#
|
||
#client = OpenAI(
|
||
# api_key=OPENAI_API_KEY,
|
||
# base_url=OPENAI_BASE_URL
|
||
#)
|
||
#
|
||
#
|
||
# =========================
|
||
# HELPERS
|
||
# =========================
|
||
#
|
||
#def safe_dir_name(name: str) -> str:
|
||
# name = str(name).strip()
|
||
# name = re.sub(r"[^\w\-.]+", "_", name)
|
||
# return name[:150]
|
||
#
|
||
#
|
||
#def now_timestamp() -> str:
|
||
# return datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
#
|
||
#
|
||
#def get_process():
|
||
# if psutil is None:
|
||
# return None
|
||
# return psutil.Process(os.getpid())
|
||
#
|
||
#
|
||
#def get_memory_rss_mb(process=None):
|
||
# if psutil is None:
|
||
# return None
|
||
# if process is None:
|
||
# process = get_process()
|
||
# return process.memory_info().rss / (1024 * 1024)
|
||
#
|
||
#
|
||
#def get_cpu_times_sec(process=None):
|
||
# if psutil is None:
|
||
# return None
|
||
# if process is None:
|
||
# process = get_process()
|
||
# cpu_times = process.cpu_times()
|
||
# return cpu_times.user + cpu_times.system
|
||
#
|
||
#
|
||
#class ResourceSampler:
|
||
# def __init__(self, interval_sec=0.05):
|
||
# self.interval_sec = interval_sec
|
||
# self.process = get_process()
|
||
# self.running = False
|
||
# self.thread = None
|
||
# self.samples_mb = []
|
||
#
|
||
# def start(self):
|
||
# if psutil is None:
|
||
# return
|
||
#
|
||
# self.running = True
|
||
# self.samples_mb = []
|
||
# self.thread = threading.Thread(target=self._sample_loop, daemon=True)
|
||
# self.thread.start()
|
||
#
|
||
# def stop(self):
|
||
# if psutil is None:
|
||
# return
|
||
#
|
||
# self.running = False
|
||
# if self.thread is not None:
|
||
# self.thread.join(timeout=1.0)
|
||
#
|
||
# def _sample_loop(self):
|
||
# while self.running:
|
||
# try:
|
||
# rss_mb = get_memory_rss_mb(self.process)
|
||
# self.samples_mb.append(rss_mb)
|
||
# except Exception:
|
||
# pass
|
||
# time.sleep(self.interval_sec)
|
||
#
|
||
# @property
|
||
# def peak_rss_mb(self):
|
||
# if not self.samples_mb:
|
||
# return None
|
||
# return max(self.samples_mb)
|
||
#
|
||
#
|
||
# =========================
|
||
# JSON EXTRACTION
|
||
# =========================
|
||
#
|
||
#def extract_json_from_text(text):
|
||
# if text is None:
|
||
# raise ValueError("Model returned empty content: message.content is None")
|
||
#
|
||
# text = str(text).strip()
|
||
#
|
||
# if not text:
|
||
# raise ValueError("Model returned empty content")
|
||
#
|
||
# text = (
|
||
# text.replace("```json", "")
|
||
# .replace("```JSON", "")
|
||
# .replace("```Json", "")
|
||
# .replace("```", "")
|
||
# .strip()
|
||
# )
|
||
#
|
||
# # Direct parse
|
||
# try:
|
||
# parsed = json.loads(text)
|
||
# if isinstance(parsed, dict):
|
||
# return parsed
|
||
# except json.JSONDecodeError:
|
||
# pass
|
||
#
|
||
# # Balanced JSON candidates
|
||
# candidates = []
|
||
# stack = []
|
||
# start_idx = None
|
||
# in_string = False
|
||
# escape = False
|
||
#
|
||
# for i, ch in enumerate(text):
|
||
# if escape:
|
||
# escape = False
|
||
# continue
|
||
#
|
||
# if ch == "\\":
|
||
# escape = True
|
||
# continue
|
||
#
|
||
# if ch == '"':
|
||
# in_string = not in_string
|
||
# continue
|
||
#
|
||
# if in_string:
|
||
# continue
|
||
#
|
||
# if ch == "{":
|
||
# if not stack:
|
||
# start_idx = i
|
||
# stack.append(ch)
|
||
#
|
||
# elif ch == "}":
|
||
# if stack:
|
||
# stack.pop()
|
||
# if not stack and start_idx is not None:
|
||
# candidates.append(text[start_idx:i + 1])
|
||
# start_idx = None
|
||
#
|
||
# valid_objects = []
|
||
#
|
||
# for candidate in candidates:
|
||
# candidate = candidate.strip()
|
||
# lowered = candidate.lower()
|
||
#
|
||
# invalid_markers = [
|
||
# "true/false",
|
||
# "null or",
|
||
# "oder zahl",
|
||
# "0.0-6.0",
|
||
# "0.0-10.0",
|
||
# "zahl zwischen",
|
||
# "...",
|
||
# ]
|
||
#
|
||
# if any(marker in lowered for marker in invalid_markers):
|
||
# continue
|
||
#
|
||
# try:
|
||
# parsed = json.loads(candidate)
|
||
# if isinstance(parsed, dict):
|
||
# valid_objects.append(parsed)
|
||
# except json.JSONDecodeError:
|
||
# continue
|
||
#
|
||
# for obj in reversed(valid_objects):
|
||
# if (
|
||
# "klassifizierbar" in obj
|
||
# and "certainty_percent" in obj
|
||
# and "subcategories" in obj
|
||
# ):
|
||
# return obj
|
||
#
|
||
# if valid_objects:
|
||
# return valid_objects[-1]
|
||
#
|
||
# # Check if it looks like truncated JSON
|
||
# stripped = text.strip()
|
||
# if stripped.startswith("{") and not stripped.endswith("}"):
|
||
# raise ValueError(
|
||
# "Model output looks like truncated JSON. "
|
||
# f"Raw output starts with: {text[:1000]}"
|
||
# )
|
||
#
|
||
# raise ValueError(
|
||
# "No valid JSON object found in model output. "
|
||
# f"Raw output starts with: {text[:1000]}"
|
||
# )
|
||
#
|
||
#
|
||
#def extract_message_content(message):
|
||
# raw_content = getattr(message, "content", None)
|
||
#
|
||
# if raw_content is not None:
|
||
# return raw_content
|
||
#
|
||
# msg_dict = None
|
||
#
|
||
# try:
|
||
# msg_dict = message.model_dump()
|
||
# except Exception:
|
||
# try:
|
||
# msg_dict = dict(message)
|
||
# except Exception:
|
||
# msg_dict = None
|
||
#
|
||
# if not isinstance(msg_dict, dict):
|
||
# return None
|
||
#
|
||
# for key in ["content", "reasoning_content", "reasoning", "text", "output_text"]:
|
||
# value = msg_dict.get(key)
|
||
# if value:
|
||
# return value
|
||
#
|
||
# possible_content = msg_dict.get("content")
|
||
# if isinstance(possible_content, list):
|
||
# parts = []
|
||
# for block in possible_content:
|
||
# if isinstance(block, dict):
|
||
# if "text" in block:
|
||
# parts.append(str(block["text"]))
|
||
# elif "content" in block:
|
||
# parts.append(str(block["content"]))
|
||
# if parts:
|
||
# return "\n".join(parts).strip()
|
||
#
|
||
# return None
|
||
#
|
||
#
|
||
# =========================
|
||
# READ INSTRUCTIONS
|
||
# =========================
|
||
#
|
||
#with open(EDSS_INSTRUCTIONS_PATH, "r", encoding="utf-8") as f:
|
||
# EDSS_INSTRUCTIONS = f.read().strip()
|
||
#
|
||
#
|
||
# =========================
|
||
# PROMPT
|
||
# =========================
|
||
#
|
||
#def build_prompt(patient_text):
|
||
# return f'''Du bist ein medizinischer Assistent für EDSS-Extraktion aus klinischen Berichten.
|
||
#
|
||
#Extrahiere:
|
||
#1. Gesamt-EDSS-Score von 0.0 bis 10.0
|
||
#2. Alle 8 EDSS-Unterkategorien
|
||
#3. Sicherheit als Ganzzahl von 0 bis 100
|
||
#
|
||
#Antworte ausschließlich mit EINEM validen JSON-Objekt.
|
||
#Kein Markdown.
|
||
#Keine Code-Fences.
|
||
#Kein Text vor oder nach JSON.
|
||
#Keine Platzhalter.
|
||
#Kopiere kein Schema.
|
||
#
|
||
#Das JSON muss exakt diese Schlüssel enthalten:
|
||
#- reason
|
||
#- klassifizierbar
|
||
#- EDSS
|
||
#- certainty_percent
|
||
#- subcategories
|
||
#
|
||
#Die subcategories müssen exakt diese 8 Schlüssel enthalten:
|
||
#- VISUAL_OPTIC_FUNCTIONS
|
||
#- BRAINSTEM_FUNCTIONS
|
||
#- PYRAMIDAL_FUNCTIONS
|
||
#- CEREBELLAR_FUNCTIONS
|
||
#- SENSORY_FUNCTIONS
|
||
#- BOWEL_AND_BLADDER_FUNCTIONS
|
||
#- CEREBRAL_FUNCTIONS
|
||
#- AMBULATION
|
||
#
|
||
#Werte:
|
||
#- klassifizierbar: true oder false
|
||
#- EDSS: Zahl von 0.0 bis 10.0 oder null
|
||
#- certainty_percent: Ganzzahl von 0 bis 100
|
||
#- Unterkategorien: Zahl oder null
|
||
#- VISUAL_OPTIC_FUNCTIONS maximal 6.0
|
||
#- BRAINSTEM_FUNCTIONS maximal 6.0
|
||
#- PYRAMIDAL_FUNCTIONS maximal 6.0
|
||
#- CEREBELLAR_FUNCTIONS maximal 6.0
|
||
#- SENSORY_FUNCTIONS maximal 6.0
|
||
#- BOWEL_AND_BLADDER_FUNCTIONS maximal 6.0
|
||
#- CEREBRAL_FUNCTIONS maximal 6.0
|
||
#- AMBULATION maximal 10.0
|
||
#- reason: maximal 250 Zeichen, Deutsch
|
||
#
|
||
#Wenn klassifizierbar false ist, setze EDSS auf null.
|
||
#
|
||
#Valide Beispielausgabe:
|
||
#{{
|
||
# "reason": "Leichte Einschränkungen mit sicher ableitbarer Gehfähigkeit und geringen funktionellen Defiziten.",
|
||
# "klassifizierbar": true,
|
||
# "EDSS": 2.0,
|
||
# "certainty_percent": 90,
|
||
# "subcategories": {{
|
||
# "VISUAL_OPTIC_FUNCTIONS": null,
|
||
# "BRAINSTEM_FUNCTIONS": null,
|
||
# "PYRAMIDAL_FUNCTIONS": 1.0,
|
||
# "CEREBELLAR_FUNCTIONS": 1.0,
|
||
# "SENSORY_FUNCTIONS": 1.0,
|
||
# "BOWEL_AND_BLADDER_FUNCTIONS": null,
|
||
# "CEREBRAL_FUNCTIONS": null,
|
||
# "AMBULATION": 0.0
|
||
# }}
|
||
#}}
|
||
#
|
||
#EDSS-Bewertungsrichtlinien:
|
||
#{EDSS_INSTRUCTIONS}
|
||
#
|
||
#Patientenbericht:
|
||
#{patient_text}
|
||
#
|
||
#Gib ausschließlich das finale JSON-Objekt zurück.
|
||
#'''
|
||
#
|
||
#
|
||
# =========================
|
||
# VALIDATION / NORMALIZATION
|
||
# =========================
|
||
#
|
||
#def normalize_model_output(parsed):
|
||
# if not isinstance(parsed, dict):
|
||
# raise ValueError("Parsed model output is not a JSON object")
|
||
#
|
||
# if "certainty_percent" not in parsed:
|
||
# parsed["certainty_percent"] = 0
|
||
# elif not isinstance(parsed["certainty_percent"], (int, float)):
|
||
# try:
|
||
# parsed["certainty_percent"] = int(parsed["certainty_percent"])
|
||
# except Exception:
|
||
# parsed["certainty_percent"] = 0
|
||
#
|
||
# parsed["certainty_percent"] = max(0, min(100, int(parsed["certainty_percent"])))
|
||
#
|
||
# if "klassifizierbar" not in parsed:
|
||
# parsed["klassifizierbar"] = False
|
||
#
|
||
# if not isinstance(parsed["klassifizierbar"], bool):
|
||
# if str(parsed["klassifizierbar"]).lower() in ["true", "1", "yes", "ja"]:
|
||
# parsed["klassifizierbar"] = True
|
||
# else:
|
||
# parsed["klassifizierbar"] = False
|
||
#
|
||
# if not parsed.get("klassifizierbar", False):
|
||
# parsed["EDSS"] = None
|
||
# else:
|
||
# if "EDSS" not in parsed or parsed["EDSS"] is None:
|
||
# parsed["EDSS"] = 7.0
|
||
# else:
|
||
# try:
|
||
# parsed["EDSS"] = float(parsed["EDSS"])
|
||
# parsed["EDSS"] = max(0.0, min(10.0, parsed["EDSS"]))
|
||
# except Exception:
|
||
# parsed["EDSS"] = 7.0
|
||
#
|
||
# required_subcategories = {
|
||
# "VISUAL_OPTIC_FUNCTIONS": 6.0,
|
||
# "BRAINSTEM_FUNCTIONS": 6.0,
|
||
# "PYRAMIDAL_FUNCTIONS": 6.0,
|
||
# "CEREBELLAR_FUNCTIONS": 6.0,
|
||
# "SENSORY_FUNCTIONS": 6.0,
|
||
# "BOWEL_AND_BLADDER_FUNCTIONS": 6.0,
|
||
# "CEREBRAL_FUNCTIONS": 6.0,
|
||
# "AMBULATION": 10.0,
|
||
# }
|
||
#
|
||
# if "subcategories" not in parsed or not isinstance(parsed["subcategories"], dict):
|
||
# parsed["subcategories"] = {}
|
||
#
|
||
# for key, max_value in required_subcategories.items():
|
||
# value = parsed["subcategories"].get(key, None)
|
||
#
|
||
# if value is None:
|
||
# parsed["subcategories"][key] = None
|
||
# else:
|
||
# try:
|
||
# value = float(value)
|
||
# value = max(0.0, min(max_value, value))
|
||
# parsed["subcategories"][key] = value
|
||
# except Exception:
|
||
# parsed["subcategories"][key] = None
|
||
#
|
||
# parsed["subcategories"] = {
|
||
# key: parsed["subcategories"].get(key, None)
|
||
# for key in required_subcategories.keys()
|
||
# }
|
||
#
|
||
# if "reason" not in parsed or parsed["reason"] is None:
|
||
# parsed["reason"] = ""
|
||
#
|
||
# parsed["reason"] = str(parsed["reason"])[:250]
|
||
#
|
||
# return parsed
|
||
#
|
||
#
|
||
# =========================
|
||
# API CALL
|
||
# =========================
|
||
#
|
||
#def make_chat_completion(model_config, prompt):
|
||
# model_name = model_config["model_name"]
|
||
#
|
||
# kwargs = dict(
|
||
# messages=[
|
||
# {
|
||
# "role": "system",
|
||
# "content": (
|
||
# "Du bist ein JSON-Generator. "
|
||
# "Antworte ausschließlich mit einem einzigen validen JSON-Objekt. "
|
||
# "Keine Erklärung. Kein Markdown. Keine Code-Fences. "
|
||
# "Keine Platzhalter. Kein Schema kopieren. "
|
||
# "Das JSON muss vollständig geschlossen sein."
|
||
# )
|
||
# },
|
||
# {
|
||
# "role": "user",
|
||
# "content": prompt
|
||
# }
|
||
# ],
|
||
# model=model_name,
|
||
# max_tokens=model_config.get("max_tokens", MAX_TOKENS),
|
||
# temperature=model_config.get("temperature", TEMPERATURE),
|
||
# )
|
||
#
|
||
# if model_config.get("use_response_format", False):
|
||
# kwargs["response_format"] = {"type": "json_object"}
|
||
#
|
||
# extra_body = model_config.get("extra_body")
|
||
# if extra_body is not None:
|
||
# kwargs["extra_body"] = extra_body
|
||
#
|
||
# return client.chat.completions.create(**kwargs)
|
||
#
|
||
#
|
||
# =========================
|
||
# INFERENCE FUNCTION WITH RETRIES
|
||
# =========================
|
||
#
|
||
#def run_inference(patient_text, model_config):
|
||
# model_name = model_config["model_name"]
|
||
# prompt = build_prompt(patient_text)
|
||
#
|
||
# process = get_process()
|
||
# sampler = ResourceSampler(interval_sec=RESOURCE_SAMPLE_INTERVAL_SEC)
|
||
#
|
||
# wall_start = time.perf_counter()
|
||
# cpu_start = get_cpu_times_sec(process)
|
||
# rss_start_mb = get_memory_rss_mb(process)
|
||
#
|
||
# sampler.start()
|
||
#
|
||
# raw_content = None
|
||
# raw_response_debug = None
|
||
# last_error = None
|
||
# prompt_tokens = None
|
||
# completion_tokens = None
|
||
# total_tokens = None
|
||
#
|
||
# try:
|
||
# for attempt in range(1, MAX_JSON_RETRIES + 2):
|
||
# try:
|
||
# response = make_chat_completion(
|
||
# model_config=model_config,
|
||
# prompt=prompt
|
||
# )
|
||
#
|
||
# message = response.choices[0].message
|
||
# raw_content = extract_message_content(message)
|
||
#
|
||
# try:
|
||
# raw_response_debug = response.model_dump()
|
||
# except Exception:
|
||
# raw_response_debug = str(response)
|
||
#
|
||
# usage = getattr(response, "usage", None)
|
||
# if usage is not None:
|
||
# prompt_tokens = getattr(usage, "prompt_tokens", None)
|
||
# completion_tokens = getattr(usage, "completion_tokens", None)
|
||
# total_tokens = getattr(usage, "total_tokens", None)
|
||
#
|
||
# parsed = extract_json_from_text(raw_content)
|
||
# parsed = normalize_model_output(parsed)
|
||
#
|
||
# success = True
|
||
# error = None
|
||
# result = parsed
|
||
# break
|
||
#
|
||
# except Exception as e:
|
||
# last_error = str(e)
|
||
#
|
||
# if attempt <= MAX_JSON_RETRIES:
|
||
# print(
|
||
# f"\n⚠️ JSON failed on attempt {attempt}. "
|
||
# f"Retrying row. Error: {last_error[:300]}"
|
||
# )
|
||
# time.sleep(RETRY_SLEEP_SEC)
|
||
# continue
|
||
#
|
||
# raise
|
||
#
|
||
# except Exception as e:
|
||
# print(f"❌ Inference error: {e}")
|
||
#
|
||
# success = False
|
||
# error = str(e)
|
||
# result = None
|
||
#
|
||
# finally:
|
||
# sampler.stop()
|
||
#
|
||
# wall_end = time.perf_counter()
|
||
# cpu_end = get_cpu_times_sec(process)
|
||
# rss_end_mb = get_memory_rss_mb(process)
|
||
#
|
||
# wall_time_sec = wall_end - wall_start
|
||
#
|
||
# if cpu_start is not None and cpu_end is not None:
|
||
# process_cpu_time_sec = cpu_end - cpu_start
|
||
# else:
|
||
# process_cpu_time_sec = None
|
||
#
|
||
# if rss_start_mb is not None and rss_end_mb is not None:
|
||
# rss_delta_mb = rss_end_mb - rss_start_mb
|
||
# else:
|
||
# rss_delta_mb = None
|
||
#
|
||
# return {
|
||
# "success": success,
|
||
# "error": error,
|
||
# "result": result,
|
||
#
|
||
# "model": model_name,
|
||
#
|
||
# "inference_time_sec": wall_time_sec,
|
||
#
|
||
# "process_cpu_time_sec": process_cpu_time_sec,
|
||
# "rss_before_mb": rss_start_mb,
|
||
# "rss_after_mb": rss_end_mb,
|
||
# "rss_delta_mb": rss_delta_mb,
|
||
# "peak_rss_mb": sampler.peak_rss_mb,
|
||
#
|
||
# "prompt_tokens": prompt_tokens,
|
||
# "completion_tokens": completion_tokens,
|
||
# "total_tokens": total_tokens,
|
||
#
|
||
# "raw_content": raw_content if not success else None,
|
||
# "raw_response_debug": raw_response_debug if not success else None,
|
||
# "last_error": last_error,
|
||
# }
|
||
#
|
||
#
|
||
# =========================
|
||
# BUILD PATIENT TEXT
|
||
# =========================
|
||
#
|
||
#def build_patient_text(row):
|
||
# return (
|
||
# str(row.get("T_Zusammenfassung", "")) + "\n" +
|
||
# str(row.get("Diagnosen", "")) + "\n" +
|
||
# str(row.get("T_KlinBef", "")) + "\n" +
|
||
# str(row.get("T_Befunde", ""))
|
||
# )
|
||
#
|
||
#
|
||
# =========================
|
||
# FLATTEN RESULTS FOR CSV
|
||
# =========================
|
||
#
|
||
#def flatten_result(record):
|
||
# flat = {
|
||
# "model": record.get("model"),
|
||
# "iteration": record.get("iteration"),
|
||
# "row_index": record.get("row_index"),
|
||
# "row_number_in_run": record.get("row_number_in_run"),
|
||
# "unique_id": record.get("unique_id"),
|
||
# "MedDatum": record.get("MedDatum"),
|
||
#
|
||
# "success": record.get("success"),
|
||
# "error": record.get("error"),
|
||
# "last_error": record.get("last_error"),
|
||
#
|
||
# "inference_time_sec": record.get("inference_time_sec"),
|
||
# "process_cpu_time_sec": record.get("process_cpu_time_sec"),
|
||
# "rss_before_mb": record.get("rss_before_mb"),
|
||
# "rss_after_mb": record.get("rss_after_mb"),
|
||
# "rss_delta_mb": record.get("rss_delta_mb"),
|
||
# "peak_rss_mb": record.get("peak_rss_mb"),
|
||
#
|
||
# "prompt_tokens": record.get("prompt_tokens"),
|
||
# "completion_tokens": record.get("completion_tokens"),
|
||
# "total_tokens": record.get("total_tokens"),
|
||
#
|
||
# "raw_content": record.get("raw_content"),
|
||
# }
|
||
#
|
||
# result = record.get("result")
|
||
#
|
||
# if isinstance(result, dict):
|
||
# flat["reason"] = result.get("reason")
|
||
# flat["klassifizierbar"] = result.get("klassifizierbar")
|
||
# flat["EDSS"] = result.get("EDSS")
|
||
# flat["certainty_percent"] = result.get("certainty_percent")
|
||
#
|
||
# subcats = result.get("subcategories", {})
|
||
# if isinstance(subcats, dict):
|
||
# for key, value in subcats.items():
|
||
# flat[f"subcat_{key}"] = value
|
||
#
|
||
# return flat
|
||
#
|
||
#
|
||
#def summarize_records(records):
|
||
# df = pd.DataFrame([flatten_result(r) for r in records])
|
||
#
|
||
# if df.empty:
|
||
# return pd.DataFrame()
|
||
#
|
||
# success_series = df["success"].fillna(False).astype(bool)
|
||
#
|
||
# summary = {
|
||
# "model": df["model"].iloc[0] if "model" in df.columns else None,
|
||
# "n_records": len(df),
|
||
# "n_success": int(success_series.sum()),
|
||
# "n_failed": int((~success_series).sum()),
|
||
# "success_rate": float(success_series.mean()),
|
||
# }
|
||
#
|
||
# numeric_cols = [
|
||
# "inference_time_sec",
|
||
# "process_cpu_time_sec",
|
||
# "rss_delta_mb",
|
||
# "peak_rss_mb",
|
||
# "prompt_tokens",
|
||
# "completion_tokens",
|
||
# "total_tokens",
|
||
# "certainty_percent",
|
||
# "EDSS",
|
||
# ]
|
||
#
|
||
# for col in numeric_cols:
|
||
# if col in df.columns:
|
||
# values = pd.to_numeric(df[col], errors="coerce")
|
||
# summary[f"{col}_mean"] = values.mean()
|
||
# summary[f"{col}_median"] = values.median()
|
||
# summary[f"{col}_std"] = values.std()
|
||
# summary[f"{col}_min"] = values.min()
|
||
# summary[f"{col}_max"] = values.max()
|
||
#
|
||
# return pd.DataFrame([summary])
|
||
#
|
||
#
|
||
# =========================
|
||
# INCREMENTAL SAVE HELPERS
|
||
# =========================
|
||
#
|
||
#def append_jsonl(path, record):
|
||
# with open(path, "a", encoding="utf-8") as f:
|
||
# f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
||
# f.flush()
|
||
# os.fsync(f.fileno())
|
||
#
|
||
#
|
||
#def append_csv(path, record):
|
||
# flat = flatten_result(record)
|
||
# df_one = pd.DataFrame([flat])
|
||
# file_exists = Path(path).exists()
|
||
# df_one.to_csv(path, mode="a", header=not file_exists, index=False)
|
||
#
|
||
#
|
||
# =========================
|
||
# MAIN LOOP
|
||
# =========================
|
||
#
|
||
#if __name__ == "__main__":
|
||
#
|
||
# run_timestamp = now_timestamp()
|
||
#
|
||
# results_root = Path(RESULTS_ROOT)
|
||
# results_root.mkdir(parents=True, exist_ok=True)
|
||
#
|
||
# run_root = results_root / f"run_{run_timestamp}"
|
||
# run_root.mkdir(parents=True, exist_ok=True)
|
||
#
|
||
# print(f"Results root: {run_root}")
|
||
#
|
||
# df = pd.read_csv(INPUT_CSV, sep=";")
|
||
#
|
||
# if MAX_ROWS is not None:
|
||
# df = df.head(MAX_ROWS)
|
||
#
|
||
# total_rows = len(df)
|
||
#
|
||
# model_names_for_print = [m["model_name"] for m in MODEL_CONFIGS]
|
||
#
|
||
# print(f"Loaded {total_rows} patient records.")
|
||
# print(f"Models: {model_names_for_print}")
|
||
# print(f"Iterations per model: {NUM_ITERATIONS}")
|
||
#
|
||
# all_model_summaries = []
|
||
#
|
||
# for model_config in MODEL_CONFIGS:
|
||
# model_name = model_config["model_name"]
|
||
# safe_model = safe_dir_name(model_name)
|
||
#
|
||
# model_dir = run_root / safe_model
|
||
# model_dir.mkdir(parents=True, exist_ok=True)
|
||
#
|
||
# print(f"\n{'#' * 80}")
|
||
# print(f"MODEL: {model_name}")
|
||
# print(f"use_response_format: {model_config.get('use_response_format', False)}")
|
||
# print(f"temperature: {model_config.get('temperature', TEMPERATURE)}")
|
||
# print(f"max_tokens: {model_config.get('max_tokens', MAX_TOKENS)}")
|
||
# print(f"Saving to: {model_dir}")
|
||
# print(f"{'#' * 80}")
|
||
#
|
||
# model_records = []
|
||
# model_start = time.perf_counter()
|
||
#
|
||
# for iteration in range(1, NUM_ITERATIONS + 1):
|
||
# print(f"\n{'=' * 60}")
|
||
# print(f"🔄 MODEL {model_name} | ITERATION {iteration}/{NUM_ITERATIONS}")
|
||
# print(f"{'=' * 60}")
|
||
#
|
||
# iteration_results = []
|
||
# iteration_start = time.perf_counter()
|
||
#
|
||
# incremental_jsonl_path = model_dir / f"{safe_model}_iter_{iteration}_{run_timestamp}_incremental.jsonl"
|
||
# incremental_csv_path = model_dir / f"{safe_model}_iter_{iteration}_{run_timestamp}_incremental.csv"
|
||
#
|
||
# print(f"Incremental JSONL: {incremental_jsonl_path}")
|
||
# print(f"Incremental CSV: {incremental_csv_path}")
|
||
#
|
||
# for loop_i, (idx, row) in enumerate(df.iterrows(), start=1):
|
||
# print(
|
||
# f"\rModel={model_name} | Row {loop_i}/{total_rows} | Iter {iteration}",
|
||
# end="",
|
||
# flush=True
|
||
# )
|
||
#
|
||
# try:
|
||
# patient_text = build_patient_text(row)
|
||
#
|
||
# record = run_inference(
|
||
# patient_text=patient_text,
|
||
# model_config=model_config
|
||
# )
|
||
#
|
||
# record["iteration"] = iteration
|
||
# record["row_index"] = int(idx)
|
||
# record["row_number_in_run"] = int(loop_i)
|
||
# record["unique_id"] = row.get("unique_id", f"row_{idx}")
|
||
# record["MedDatum"] = row.get("MedDatum", None)
|
||
#
|
||
# iteration_results.append(record)
|
||
# model_records.append(record)
|
||
#
|
||
# if loop_i % SAVE_EVERY_N_ROWS == 0:
|
||
# append_jsonl(incremental_jsonl_path, record)
|
||
# append_csv(incremental_csv_path, record)
|
||
#
|
||
# if record["success"]:
|
||
# res = record["result"]
|
||
# edss = res.get("EDSS", "N/A") if res.get("klassifizierbar") else "N/A"
|
||
# print(
|
||
# f" ✅ EDSS={edss}, "
|
||
# f"cert={res.get('certainty_percent', '?')}%, "
|
||
# f"time={record['inference_time_sec']:.2f}s"
|
||
# )
|
||
# else:
|
||
# print(f" ❌ {record.get('error', 'Unknown error')}")
|
||
#
|
||
# except Exception as e:
|
||
# print(f"\n⚠️ Row {idx} failed outside inference wrapper: {e}")
|
||
#
|
||
# fallback_record = {
|
||
# "success": False,
|
||
# "error": str(e),
|
||
# "last_error": str(e),
|
||
# "result": None,
|
||
#
|
||
# "model": model_name,
|
||
# "iteration": iteration,
|
||
# "row_index": int(idx),
|
||
# "row_number_in_run": int(loop_i),
|
||
# "unique_id": row.get("unique_id", f"row_{idx}"),
|
||
# "MedDatum": row.get("MedDatum", None),
|
||
#
|
||
# "inference_time_sec": None,
|
||
# "process_cpu_time_sec": None,
|
||
# "rss_before_mb": None,
|
||
# "rss_after_mb": None,
|
||
# "rss_delta_mb": None,
|
||
# "peak_rss_mb": None,
|
||
#
|
||
# "prompt_tokens": None,
|
||
# "completion_tokens": None,
|
||
# "total_tokens": None,
|
||
#
|
||
# "raw_content": None,
|
||
# "raw_response_debug": None,
|
||
# }
|
||
#
|
||
# iteration_results.append(fallback_record)
|
||
# model_records.append(fallback_record)
|
||
#
|
||
# append_jsonl(incremental_jsonl_path, fallback_record)
|
||
# append_csv(incremental_csv_path, fallback_record)
|
||
#
|
||
# if STOP_ON_FIRST_ERROR:
|
||
# break
|
||
#
|
||
# iteration_elapsed = time.perf_counter() - iteration_start
|
||
#
|
||
# iter_json_path = model_dir / f"{safe_model}_results_iter_{iteration}_{run_timestamp}.json"
|
||
# with open(iter_json_path, "w", encoding="utf-8") as f:
|
||
# json.dump(iteration_results, f, indent=2, ensure_ascii=False)
|
||
#
|
||
# iter_csv_path = model_dir / f"{safe_model}_results_iter_{iteration}_{run_timestamp}.csv"
|
||
# iter_flat_df = pd.DataFrame([flatten_result(r) for r in iteration_results])
|
||
# iter_flat_df.to_csv(iter_csv_path, index=False)
|
||
#
|
||
# print(f"\n✅ Iteration {iteration} complete.")
|
||
# print(f"Incremental JSONL saved to: {incremental_jsonl_path}")
|
||
# print(f"Incremental CSV saved to: {incremental_csv_path}")
|
||
# print(f"Final JSON saved to: {iter_json_path}")
|
||
# print(f"Final CSV saved to: {iter_csv_path}")
|
||
# print(
|
||
# f"⏱️ Iteration time: {iteration_elapsed:.1f}s "
|
||
# f"({iteration_elapsed / max(total_rows, 1):.2f}s/row)"
|
||
# )
|
||
#
|
||
# model_elapsed = time.perf_counter() - model_start
|
||
#
|
||
# model_json_path = model_dir / f"{safe_model}_all_results_{run_timestamp}.json"
|
||
# with open(model_json_path, "w", encoding="utf-8") as f:
|
||
# json.dump(model_records, f, indent=2, ensure_ascii=False)
|
||
#
|
||
# model_csv_path = model_dir / f"{safe_model}_all_results_{run_timestamp}.csv"
|
||
# model_flat_df = pd.DataFrame([flatten_result(r) for r in model_records])
|
||
# model_flat_df.to_csv(model_csv_path, index=False)
|
||
#
|
||
# model_summary_df = summarize_records(model_records)
|
||
# model_summary_df["model_total_wall_time_sec"] = model_elapsed
|
||
# model_summary_df["model_total_wall_time_min"] = model_elapsed / 60
|
||
#
|
||
# model_summary_path = model_dir / f"{safe_model}_summary_{run_timestamp}.csv"
|
||
# model_summary_df.to_csv(model_summary_path, index=False)
|
||
#
|
||
# all_model_summaries.append(model_summary_df)
|
||
#
|
||
# print(f"\n🎉 Model completed: {model_name}")
|
||
# print(f"All JSON: {model_json_path}")
|
||
# print(f"All CSV: {model_csv_path}")
|
||
# print(f"Summary: {model_summary_path}")
|
||
# print(f"Total model time: {model_elapsed / 60:.2f} min")
|
||
#
|
||
# if all_model_summaries:
|
||
# combined_summary_df = pd.concat(all_model_summaries, ignore_index=True)
|
||
# combined_summary_path = run_root / f"all_models_summary_{run_timestamp}.csv"
|
||
# combined_summary_df.to_csv(combined_summary_path, index=False)
|
||
#
|
||
# print(f"\n📊 Combined summary saved to: {combined_summary_path}")
|
||
#
|
||
# print(f"\n🎉 All models and all iterations completed!")
|
||
##
|
||
|
||
# %% API call - Multi-model, multi-iteration EDSS + timing/resource benchmark2
|
||
#
|
||
#import time
|
||
#import json
|
||
#import os
|
||
#import re
|
||
#import threading
|
||
#from datetime import datetime
|
||
#from pathlib import Path
|
||
#
|
||
#import pandas as pd
|
||
#from openai import OpenAI
|
||
#from dotenv import load_dotenv
|
||
#
|
||
#try:
|
||
# import psutil
|
||
#except ImportError:
|
||
# psutil = None
|
||
# print("⚠️ psutil is not installed. Resource metrics will be limited.")
|
||
# print("Install with: pip install psutil")
|
||
#
|
||
#
|
||
## =========================
|
||
## CONFIGURATION
|
||
## =========================
|
||
#
|
||
#load_dotenv()
|
||
#
|
||
#OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
||
#OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")
|
||
#
|
||
#MODEL_CONFIGS = [
|
||
# {
|
||
# "model_name": "gpt-oss-120b",
|
||
# "use_response_format": True,
|
||
# "temperature": 0.0,
|
||
# "max_tokens": 4096,
|
||
# "extra_body": None,
|
||
# },
|
||
# {
|
||
# "model_name": "qwen3.6-27b",
|
||
# "use_response_format": False,
|
||
# "temperature": 0.0,
|
||
# "max_tokens": 4096,
|
||
# "extra_body": {
|
||
# "chat_template_kwargs": {
|
||
# "enable_thinking": False
|
||
# }
|
||
# },
|
||
# },
|
||
# {
|
||
# "model_name": "gemma-4-31B-it",
|
||
# "use_response_format": False,
|
||
# "temperature": 0.0,
|
||
# "max_tokens": 4096,
|
||
# "extra_body": None,
|
||
# },
|
||
# ]
|
||
#
|
||
#INPUT_CSV ="/home/shahin/Lab/Doktorarbeit/Barcelona/data/processed/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv"
|
||
#EDSS_INSTRUCTIONS_PATH = "/home/shahin/Lab/Doktorarbeit/Barcelona/prompts/Komplett.txt"
|
||
#
|
||
#RESULTS_ROOT = "/home/shahin/Lab/Doktorarbeit/Barcelona/results/benchmark_runs"
|
||
#
|
||
#NUM_ITERATIONS = 10
|
||
#STOP_ON_FIRST_ERROR = False
|
||
#
|
||
## For testing, set to e.g. 2.
|
||
## For full run, set to None.
|
||
#MAX_ROWS = None
|
||
## MAX_ROWS = 2
|
||
#
|
||
#MAX_TOKENS = 4096
|
||
#TEMPERATURE = 0.0
|
||
#
|
||
#RESOURCE_SAMPLE_INTERVAL_SEC = 0.05
|
||
#
|
||
#SAVE_EVERY_N_ROWS = 1
|
||
#
|
||
## Retries for invalid JSON / truncated JSON
|
||
#MAX_JSON_RETRIES = 2
|
||
#RETRY_SLEEP_SEC = 2
|
||
#
|
||
#
|
||
## =========================
|
||
## VALID CLINICAL RANGES
|
||
## =========================
|
||
#
|
||
#EDSS_MIN = 0.0
|
||
#EDSS_MAX = 10.0
|
||
#
|
||
#FUNCTIONAL_SYSTEM_RANGES = {
|
||
# "VISUAL_OPTIC_FUNCTIONS": (0.0, 6.0),
|
||
# "BRAINSTEM_FUNCTIONS": (0.0, 6.0),
|
||
# "PYRAMIDAL_FUNCTIONS": (0.0, 6.0),
|
||
# "CEREBELLAR_FUNCTIONS": (0.0, 6.0),
|
||
# "SENSORY_FUNCTIONS": (0.0, 6.0),
|
||
# "BOWEL_AND_BLADDER_FUNCTIONS": (0.0, 6.0),
|
||
# "CEREBRAL_FUNCTIONS": (0.0, 6.0),
|
||
# "AMBULATION": (0.0, 10.0),
|
||
#}
|
||
#
|
||
#REQUIRED_TOP_LEVEL_FIELDS = [
|
||
# "reason",
|
||
# "klassifizierbar",
|
||
# "EDSS",
|
||
# "certainty_percent",
|
||
# "subcategories",
|
||
#]
|
||
#
|
||
#
|
||
## =========================
|
||
## CLIENT
|
||
## =========================
|
||
#
|
||
#client = OpenAI(
|
||
# api_key=OPENAI_API_KEY,
|
||
# base_url=OPENAI_BASE_URL
|
||
#)
|
||
#
|
||
#
|
||
## =========================
|
||
## HELPERS
|
||
## =========================
|
||
#
|
||
#def safe_dir_name(name: str) -> str:
|
||
# name = str(name).strip()
|
||
# name = re.sub(r"[^\w\-.]+", "_", name)
|
||
# return name[:150]
|
||
#
|
||
#
|
||
#def now_timestamp() -> str:
|
||
# return datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
#
|
||
#
|
||
#def get_process():
|
||
# if psutil is None:
|
||
# return None
|
||
# return psutil.Process(os.getpid())
|
||
#
|
||
#
|
||
#def get_memory_rss_mb(process=None):
|
||
# if psutil is None:
|
||
# return None
|
||
# if process is None:
|
||
# process = get_process()
|
||
# return process.memory_info().rss / (1024 * 1024)
|
||
#
|
||
#
|
||
#def get_cpu_times_sec(process=None):
|
||
# if psutil is None:
|
||
# return None
|
||
# if process is None:
|
||
# process = get_process()
|
||
# cpu_times = process.cpu_times()
|
||
# return cpu_times.user + cpu_times.system
|
||
#
|
||
#
|
||
#class ResourceSampler:
|
||
# def __init__(self, interval_sec=0.05):
|
||
# self.interval_sec = interval_sec
|
||
# self.process = get_process()
|
||
# self.running = False
|
||
# self.thread = None
|
||
# self.samples_mb = []
|
||
#
|
||
# def start(self):
|
||
# if psutil is None:
|
||
# return
|
||
#
|
||
# self.running = True
|
||
# self.samples_mb = []
|
||
# self.thread = threading.Thread(target=self._sample_loop, daemon=True)
|
||
# self.thread.start()
|
||
#
|
||
# def stop(self):
|
||
# if psutil is None:
|
||
# return
|
||
#
|
||
# self.running = False
|
||
# if self.thread is not None:
|
||
# self.thread.join(timeout=1.0)
|
||
#
|
||
# def _sample_loop(self):
|
||
# while self.running:
|
||
# try:
|
||
# rss_mb = get_memory_rss_mb(self.process)
|
||
# self.samples_mb.append(rss_mb)
|
||
# except Exception:
|
||
# pass
|
||
# time.sleep(self.interval_sec)
|
||
#
|
||
# @property
|
||
# def peak_rss_mb(self):
|
||
# if not self.samples_mb:
|
||
# return None
|
||
# return max(self.samples_mb)
|
||
#
|
||
#
|
||
## =========================
|
||
## JSON EXTRACTION
|
||
## =========================
|
||
#
|
||
#def extract_json_from_text(text):
|
||
# if text is None:
|
||
# raise ValueError("Model returned empty content: message.content is None")
|
||
#
|
||
# text = str(text).strip()
|
||
#
|
||
# if not text:
|
||
# raise ValueError("Model returned empty content")
|
||
#
|
||
# text = (
|
||
# text.replace("```json", "")
|
||
# .replace("```JSON", "")
|
||
# .replace("```Json", "")
|
||
# .replace("```", "")
|
||
# .strip()
|
||
# )
|
||
#
|
||
# # Direct parse
|
||
# try:
|
||
# parsed = json.loads(text)
|
||
# if isinstance(parsed, dict):
|
||
# return parsed
|
||
# except json.JSONDecodeError:
|
||
# pass
|
||
#
|
||
# # Balanced JSON candidates
|
||
# candidates = []
|
||
# stack = []
|
||
# start_idx = None
|
||
# in_string = False
|
||
# escape = False
|
||
#
|
||
# for i, ch in enumerate(text):
|
||
# if escape:
|
||
# escape = False
|
||
# continue
|
||
#
|
||
# if ch == "\\":
|
||
# escape = True
|
||
# continue
|
||
#
|
||
# if ch == '"':
|
||
# in_string = not in_string
|
||
# continue
|
||
#
|
||
# if in_string:
|
||
# continue
|
||
#
|
||
# if ch == "{":
|
||
# if not stack:
|
||
# start_idx = i
|
||
# stack.append(ch)
|
||
#
|
||
# elif ch == "}":
|
||
# if stack:
|
||
# stack.pop()
|
||
# if not stack and start_idx is not None:
|
||
# candidates.append(text[start_idx:i + 1])
|
||
# start_idx = None
|
||
#
|
||
# valid_objects = []
|
||
#
|
||
# for candidate in candidates:
|
||
# candidate = candidate.strip()
|
||
# lowered = candidate.lower()
|
||
#
|
||
# invalid_markers = [
|
||
# "true/false",
|
||
# "null or",
|
||
# "oder zahl",
|
||
# "0.0-6.0",
|
||
# "0.0-10.0",
|
||
# "zahl zwischen",
|
||
# "...",
|
||
# ]
|
||
#
|
||
# if any(marker in lowered for marker in invalid_markers):
|
||
# continue
|
||
#
|
||
# try:
|
||
# parsed = json.loads(candidate)
|
||
# if isinstance(parsed, dict):
|
||
# valid_objects.append(parsed)
|
||
# except json.JSONDecodeError:
|
||
# continue
|
||
#
|
||
# for obj in reversed(valid_objects):
|
||
# if (
|
||
# "klassifizierbar" in obj
|
||
# and "certainty_percent" in obj
|
||
# and "subcategories" in obj
|
||
# ):
|
||
# return obj
|
||
#
|
||
# if valid_objects:
|
||
# return valid_objects[-1]
|
||
#
|
||
# stripped = text.strip()
|
||
# if stripped.startswith("{") and not stripped.endswith("}"):
|
||
# raise ValueError(
|
||
# "Model output looks like truncated JSON. "
|
||
# f"Raw output starts with: {text[:1000]}"
|
||
# )
|
||
#
|
||
# raise ValueError(
|
||
# "No valid JSON object found in model output. "
|
||
# f"Raw output starts with: {text[:1000]}"
|
||
# )
|
||
#
|
||
#
|
||
#def extract_message_content(message):
|
||
# raw_content = getattr(message, "content", None)
|
||
#
|
||
# if raw_content is not None:
|
||
# return raw_content
|
||
#
|
||
# msg_dict = None
|
||
#
|
||
# try:
|
||
# msg_dict = message.model_dump()
|
||
# except Exception:
|
||
# try:
|
||
# msg_dict = dict(message)
|
||
# except Exception:
|
||
# msg_dict = None
|
||
#
|
||
# if not isinstance(msg_dict, dict):
|
||
# return None
|
||
#
|
||
# for key in ["content", "reasoning_content", "reasoning", "text", "output_text"]:
|
||
# value = msg_dict.get(key)
|
||
# if value:
|
||
# return value
|
||
#
|
||
# possible_content = msg_dict.get("content")
|
||
# if isinstance(possible_content, list):
|
||
# parts = []
|
||
# for block in possible_content:
|
||
# if isinstance(block, dict):
|
||
# if "text" in block:
|
||
# parts.append(str(block["text"]))
|
||
# elif "content" in block:
|
||
# parts.append(str(block["content"]))
|
||
# if parts:
|
||
# return "\n".join(parts).strip()
|
||
#
|
||
# return None
|
||
#
|
||
#
|
||
## =========================
|
||
## READ INSTRUCTIONS
|
||
## =========================
|
||
#
|
||
#with open(EDSS_INSTRUCTIONS_PATH, "r", encoding="utf-8") as f:
|
||
# EDSS_INSTRUCTIONS = f.read().strip()
|
||
#
|
||
#
|
||
## =========================
|
||
## PROMPT
|
||
## =========================
|
||
#
|
||
#def build_prompt(patient_text):
|
||
# return f'''Du bist ein medizinischer Assistent für EDSS-Extraktion aus klinischen Berichten.
|
||
#
|
||
#Extrahiere:
|
||
#1. Gesamt-EDSS-Score von 0.0 bis 10.0
|
||
#2. Alle 8 EDSS-Unterkategorien
|
||
#3. Sicherheit als Ganzzahl von 0 bis 100
|
||
#
|
||
#Antworte ausschließlich mit EINEM validen JSON-Objekt.
|
||
#Kein Markdown.
|
||
#Keine Code-Fences.
|
||
#Kein Text vor oder nach JSON.
|
||
#Keine Platzhalter.
|
||
#Kopiere kein Schema.
|
||
#
|
||
#Das JSON muss exakt diese Schlüssel enthalten:
|
||
#- reason
|
||
#- klassifizierbar
|
||
#- EDSS
|
||
#- certainty_percent
|
||
#- subcategories
|
||
#
|
||
#Die subcategories müssen exakt diese 8 Schlüssel enthalten:
|
||
#- VISUAL_OPTIC_FUNCTIONS
|
||
#- BRAINSTEM_FUNCTIONS
|
||
#- PYRAMIDAL_FUNCTIONS
|
||
#- CEREBELLAR_FUNCTIONS
|
||
#- SENSORY_FUNCTIONS
|
||
#- BOWEL_AND_BLADDER_FUNCTIONS
|
||
#- CEREBRAL_FUNCTIONS
|
||
#- AMBULATION
|
||
#
|
||
#Werte:
|
||
#- klassifizierbar: true oder false
|
||
#- EDSS: Zahl von 0.0 bis 10.0 oder null
|
||
#- certainty_percent: Ganzzahl von 0 bis 100
|
||
#- Unterkategorien: Zahl oder null
|
||
#- VISUAL_OPTIC_FUNCTIONS maximal 6.0
|
||
#- BRAINSTEM_FUNCTIONS maximal 6.0
|
||
#- PYRAMIDAL_FUNCTIONS maximal 6.0
|
||
#- CEREBELLAR_FUNCTIONS maximal 6.0
|
||
#- SENSORY_FUNCTIONS maximal 6.0
|
||
#- BOWEL_AND_BLADDER_FUNCTIONS maximal 6.0
|
||
#- CEREBRAL_FUNCTIONS maximal 6.0
|
||
#- AMBULATION maximal 10.0
|
||
#- reason: maximal 250 Zeichen, Deutsch
|
||
#
|
||
#Wenn klassifizierbar false ist, setze EDSS auf null.
|
||
#
|
||
#Valide Beispielausgabe:
|
||
#{{
|
||
# "reason": "Leichte Einschränkungen mit sicher ableitbarer Gehfähigkeit und geringen funktionellen Defiziten.",
|
||
# "klassifizierbar": true,
|
||
# "EDSS": 2.0,
|
||
# "certainty_percent": 90,
|
||
# "subcategories": {{
|
||
# "VISUAL_OPTIC_FUNCTIONS": null,
|
||
# "BRAINSTEM_FUNCTIONS": null,
|
||
# "PYRAMIDAL_FUNCTIONS": 1.0,
|
||
# "CEREBELLAR_FUNCTIONS": 1.0,
|
||
# "SENSORY_FUNCTIONS": 1.0,
|
||
# "BOWEL_AND_BLADDER_FUNCTIONS": null,
|
||
# "CEREBRAL_FUNCTIONS": null,
|
||
# "AMBULATION": 0.0
|
||
# }}
|
||
#}}
|
||
#
|
||
#EDSS-Bewertungsrichtlinien:
|
||
#{EDSS_INSTRUCTIONS}
|
||
#
|
||
#Patientenbericht:
|
||
#{patient_text}
|
||
#
|
||
#Gib ausschließlich das finale JSON-Objekt zurück.
|
||
#'''
|
||
#
|
||
#
|
||
## =========================
|
||
## VALIDATION, NOT NORMALIZATION
|
||
## =========================
|
||
#
|
||
#def parse_float_preserve_raw(value):
|
||
# """
|
||
# Try to parse a value as float without clipping or correcting it.
|
||
#
|
||
# Returns:
|
||
# raw_value: original value exactly as present in parsed JSON
|
||
# numeric_value: float or None
|
||
# is_numeric: bool
|
||
# """
|
||
# raw_value = value
|
||
#
|
||
# if value is None:
|
||
# return raw_value, None, False
|
||
#
|
||
# if isinstance(value, bool):
|
||
# return raw_value, None, False
|
||
#
|
||
# try:
|
||
# numeric_value = float(str(value).replace(",", "."))
|
||
# return raw_value, numeric_value, True
|
||
# except Exception:
|
||
# return raw_value, None, False
|
||
#
|
||
#
|
||
#def is_in_range(value, min_value, max_value):
|
||
# """
|
||
# Range check without clipping.
|
||
# """
|
||
# if value is None:
|
||
# return False
|
||
# return min_value <= value <= max_value
|
||
#
|
||
#
|
||
#def validate_model_output(parsed):
|
||
# """
|
||
# Validate parsed model output without repairing/clipping clinical values.
|
||
#
|
||
# Important:
|
||
# - Does NOT clip EDSS.
|
||
# - Does NOT clip functional system values.
|
||
# - Does NOT insert default EDSS.
|
||
# - Does NOT insert default certainty_percent.
|
||
# - Missing fields are kept as None.
|
||
# - Adds explicit validity flags for scientific transparency.
|
||
# """
|
||
#
|
||
# validation = {
|
||
# "json_parse_success": isinstance(parsed, dict),
|
||
# "required_fields_present": False,
|
||
# "required_schema_success": False,
|
||
# "clinical_range_valid": False,
|
||
# "certainty_present": False,
|
||
#
|
||
# "missing_required_fields": [],
|
||
# "missing_subcategory_fields": [],
|
||
#
|
||
# "EDSS_is_numeric": False,
|
||
# "EDSS_in_valid_range": False,
|
||
# }
|
||
#
|
||
# if not isinstance(parsed, dict):
|
||
# return {
|
||
# "raw_output": parsed,
|
||
# "validated_output": {},
|
||
# "validation": validation,
|
||
# }
|
||
#
|
||
# missing_required = [
|
||
# field for field in REQUIRED_TOP_LEVEL_FIELDS
|
||
# if field not in parsed
|
||
# ]
|
||
#
|
||
# validation["missing_required_fields"] = missing_required
|
||
# validation["required_fields_present"] = len(missing_required) == 0
|
||
#
|
||
# validated = {}
|
||
#
|
||
# validated["reason"] = parsed.get("reason", None)
|
||
# validated["klassifizierbar"] = parsed.get("klassifizierbar", None)
|
||
#
|
||
# raw_certainty = parsed.get("certainty_percent", None)
|
||
# validated["raw_certainty_percent"] = raw_certainty
|
||
# validation["certainty_present"] = "certainty_percent" in parsed and raw_certainty is not None
|
||
#
|
||
# _, certainty_numeric, certainty_is_numeric = parse_float_preserve_raw(raw_certainty)
|
||
# validated["certainty_percent"] = certainty_numeric if certainty_is_numeric else None
|
||
# validated["certainty_percent_is_numeric"] = certainty_is_numeric
|
||
# validated["certainty_percent_in_valid_range"] = (
|
||
# is_in_range(certainty_numeric, 0.0, 100.0)
|
||
# if certainty_is_numeric else False
|
||
# )
|
||
#
|
||
# raw_edss = parsed.get("EDSS", None)
|
||
# raw_edss, edss_numeric, edss_is_numeric = parse_float_preserve_raw(raw_edss)
|
||
#
|
||
# validated["raw_EDSS"] = raw_edss
|
||
# validated["EDSS_numeric"] = edss_numeric
|
||
# validated["EDSS"] = edss_numeric # Backward-compatible; parsed only, not clipped
|
||
# validated["EDSS_is_numeric"] = edss_is_numeric
|
||
# validated["EDSS_in_valid_range"] = (
|
||
# is_in_range(edss_numeric, EDSS_MIN, EDSS_MAX)
|
||
# if edss_is_numeric else False
|
||
# )
|
||
#
|
||
# validation["EDSS_is_numeric"] = validated["EDSS_is_numeric"]
|
||
# validation["EDSS_in_valid_range"] = validated["EDSS_in_valid_range"]
|
||
#
|
||
# raw_subcategories = parsed.get("subcategories", None)
|
||
#
|
||
# if isinstance(raw_subcategories, dict):
|
||
# subcategories = raw_subcategories
|
||
# else:
|
||
# subcategories = {}
|
||
#
|
||
# validated["subcategories"] = {}
|
||
# validated["raw_subcategories"] = {}
|
||
# validated["subcategory_validation"] = {}
|
||
#
|
||
# missing_subcats = []
|
||
#
|
||
# for subcat, (min_value, max_value) in FUNCTIONAL_SYSTEM_RANGES.items():
|
||
# if subcat not in subcategories:
|
||
# missing_subcats.append(subcat)
|
||
#
|
||
# raw_value = subcategories.get(subcat, None)
|
||
# raw_value, numeric_value, is_numeric_value = parse_float_preserve_raw(raw_value)
|
||
# in_valid_range = (
|
||
# is_in_range(numeric_value, min_value, max_value)
|
||
# if is_numeric_value else False
|
||
# )
|
||
#
|
||
# validated["raw_subcategories"][subcat] = raw_value
|
||
# validated["subcategories"][subcat] = numeric_value
|
||
#
|
||
# validated["subcategory_validation"][subcat] = {
|
||
# "is_numeric": is_numeric_value,
|
||
# "in_valid_range": in_valid_range,
|
||
# "min_allowed": min_value,
|
||
# "max_allowed": max_value,
|
||
# }
|
||
#
|
||
# validation["missing_subcategory_fields"] = missing_subcats
|
||
#
|
||
# subcategory_schema_present = len(missing_subcats) == 0
|
||
#
|
||
# all_subcats_numeric = all(
|
||
# validated["subcategory_validation"][subcat]["is_numeric"]
|
||
# for subcat in FUNCTIONAL_SYSTEM_RANGES
|
||
# )
|
||
#
|
||
# all_subcats_in_range = all(
|
||
# validated["subcategory_validation"][subcat]["in_valid_range"]
|
||
# for subcat in FUNCTIONAL_SYSTEM_RANGES
|
||
# )
|
||
#
|
||
# validated["all_functional_systems_numeric"] = all_subcats_numeric
|
||
# validated["all_functional_systems_in_valid_range"] = all_subcats_in_range
|
||
#
|
||
# validation["clinical_range_valid"] = (
|
||
# validated["EDSS_in_valid_range"]
|
||
# and all_subcats_in_range
|
||
# )
|
||
#
|
||
# validation["required_schema_success"] = (
|
||
# validation["required_fields_present"]
|
||
# and subcategory_schema_present
|
||
# )
|
||
#
|
||
# return {
|
||
# "raw_output": parsed,
|
||
# "validated_output": validated,
|
||
# "validation": validation,
|
||
# }
|
||
#
|
||
#
|
||
## =========================
|
||
## API CALL
|
||
## =========================
|
||
#
|
||
#def make_chat_completion(model_config, prompt):
|
||
# model_name = model_config["model_name"]
|
||
#
|
||
# kwargs = dict(
|
||
# messages=[
|
||
# {
|
||
# "role": "system",
|
||
# "content": (
|
||
# "Du bist ein JSON-Generator. "
|
||
# "Antworte ausschließlich mit einem einzigen validen JSON-Objekt. "
|
||
# "Keine Erklärung. Kein Markdown. Keine Code-Fences. "
|
||
# "Keine Platzhalter. Kein Schema kopieren. "
|
||
# "Das JSON muss vollständig geschlossen sein."
|
||
# )
|
||
# },
|
||
# {
|
||
# "role": "user",
|
||
# "content": prompt
|
||
# }
|
||
# ],
|
||
# model=model_name,
|
||
# max_tokens=model_config.get("max_tokens", MAX_TOKENS),
|
||
# temperature=model_config.get("temperature", TEMPERATURE),
|
||
# )
|
||
#
|
||
# if model_config.get("use_response_format", False):
|
||
# kwargs["response_format"] = {"type": "json_object"}
|
||
#
|
||
# extra_body = model_config.get("extra_body")
|
||
# if extra_body is not None:
|
||
# kwargs["extra_body"] = extra_body
|
||
#
|
||
# return client.chat.completions.create(**kwargs)
|
||
#
|
||
#
|
||
## =========================
|
||
## INFERENCE FUNCTION WITH RETRIES
|
||
## =========================
|
||
#
|
||
#def run_inference(patient_text, model_config):
|
||
# model_name = model_config["model_name"]
|
||
# prompt = build_prompt(patient_text)
|
||
#
|
||
# process = get_process()
|
||
# sampler = ResourceSampler(interval_sec=RESOURCE_SAMPLE_INTERVAL_SEC)
|
||
#
|
||
# wall_start = time.perf_counter()
|
||
# cpu_start = get_cpu_times_sec(process)
|
||
# rss_start_mb = get_memory_rss_mb(process)
|
||
#
|
||
# sampler.start()
|
||
#
|
||
# raw_content = None
|
||
# raw_response_debug = None
|
||
# raw_parsed_output = None
|
||
# validation = None
|
||
# last_error = None
|
||
#
|
||
# prompt_tokens = None
|
||
# completion_tokens = None
|
||
# total_tokens = None
|
||
#
|
||
# try:
|
||
# for attempt in range(1, MAX_JSON_RETRIES + 2):
|
||
# try:
|
||
# response = make_chat_completion(
|
||
# model_config=model_config,
|
||
# prompt=prompt
|
||
# )
|
||
#
|
||
# message = response.choices[0].message
|
||
# raw_content = extract_message_content(message)
|
||
#
|
||
# try:
|
||
# raw_response_debug = response.model_dump()
|
||
# except Exception:
|
||
# raw_response_debug = str(response)
|
||
#
|
||
# usage = getattr(response, "usage", None)
|
||
# if usage is not None:
|
||
# prompt_tokens = getattr(usage, "prompt_tokens", None)
|
||
# completion_tokens = getattr(usage, "completion_tokens", None)
|
||
# total_tokens = getattr(usage, "total_tokens", None)
|
||
#
|
||
# parsed = extract_json_from_text(raw_content)
|
||
# validation_package = validate_model_output(parsed)
|
||
#
|
||
# success = True
|
||
# error = None
|
||
#
|
||
# result = validation_package["validated_output"]
|
||
# validation = validation_package["validation"]
|
||
# raw_parsed_output = validation_package["raw_output"]
|
||
#
|
||
# break
|
||
#
|
||
# except Exception as e:
|
||
# last_error = str(e)
|
||
#
|
||
# if attempt <= MAX_JSON_RETRIES:
|
||
# print(
|
||
# f"\n⚠️ JSON failed on attempt {attempt}. "
|
||
# f"Retrying row. Error: {last_error[:300]}"
|
||
# )
|
||
# time.sleep(RETRY_SLEEP_SEC)
|
||
# continue
|
||
#
|
||
# raise
|
||
#
|
||
# except Exception as e:
|
||
# print(f"❌ Inference error: {e}")
|
||
#
|
||
# success = False
|
||
# error = str(e)
|
||
# result = None
|
||
# raw_parsed_output = None
|
||
#
|
||
# validation = {
|
||
# "json_parse_success": False,
|
||
# "required_fields_present": False,
|
||
# "required_schema_success": False,
|
||
# "clinical_range_valid": False,
|
||
# "certainty_present": False,
|
||
# "missing_required_fields": [],
|
||
# "missing_subcategory_fields": [],
|
||
# "EDSS_is_numeric": False,
|
||
# "EDSS_in_valid_range": False,
|
||
# }
|
||
#
|
||
# finally:
|
||
# sampler.stop()
|
||
#
|
||
# wall_end = time.perf_counter()
|
||
# cpu_end = get_cpu_times_sec(process)
|
||
# rss_end_mb = get_memory_rss_mb(process)
|
||
#
|
||
# wall_time_sec = wall_end - wall_start
|
||
#
|
||
# if cpu_start is not None and cpu_end is not None:
|
||
# process_cpu_time_sec = cpu_end - cpu_start
|
||
# else:
|
||
# process_cpu_time_sec = None
|
||
#
|
||
# if rss_start_mb is not None and rss_end_mb is not None:
|
||
# rss_delta_mb = rss_end_mb - rss_start_mb
|
||
# else:
|
||
# rss_delta_mb = None
|
||
#
|
||
# return {
|
||
# "success": success,
|
||
# "error": error,
|
||
# "result": result,
|
||
#
|
||
# "validation": validation,
|
||
# "raw_parsed_output": raw_parsed_output,
|
||
#
|
||
# "model": model_name,
|
||
#
|
||
# "inference_time_sec": wall_time_sec,
|
||
#
|
||
# "process_cpu_time_sec": process_cpu_time_sec,
|
||
# "rss_before_mb": rss_start_mb,
|
||
# "rss_after_mb": rss_end_mb,
|
||
# "rss_delta_mb": rss_delta_mb,
|
||
# "peak_rss_mb": sampler.peak_rss_mb,
|
||
#
|
||
# "prompt_tokens": prompt_tokens,
|
||
# "completion_tokens": completion_tokens,
|
||
# "total_tokens": total_tokens,
|
||
#
|
||
# # Keeping raw content improves auditability but can make files large.
|
||
# # To save space, change this to: raw_content if not success else None
|
||
# "raw_content": raw_content,
|
||
# "raw_response_debug": raw_response_debug if not success else None,
|
||
# "last_error": last_error,
|
||
# }
|
||
#
|
||
#
|
||
## =========================
|
||
## BUILD PATIENT TEXT
|
||
## =========================
|
||
#
|
||
#def build_patient_text(row):
|
||
# return (
|
||
# str(row.get("T_Zusammenfassung", "")) + "\n" +
|
||
# str(row.get("Diagnosen", "")) + "\n" +
|
||
# str(row.get("T_KlinBef", "")) + "\n" +
|
||
# str(row.get("T_Befunde", ""))
|
||
# )
|
||
#
|
||
#
|
||
## =========================
|
||
## FLATTEN RESULTS FOR CSV
|
||
## =========================
|
||
#
|
||
#def flatten_result(record):
|
||
# """
|
||
# Flatten one benchmark record for CSV export.
|
||
#
|
||
# This preserves:
|
||
# - raw model values
|
||
# - parsed numeric values without clipping
|
||
# - validity flags
|
||
# - backward-compatible columns where possible
|
||
# """
|
||
#
|
||
# validation = record.get("validation") or {}
|
||
# result = record.get("result") or {}
|
||
#
|
||
# flat = {
|
||
# "model": record.get("model"),
|
||
# "iteration": record.get("iteration"),
|
||
# "row_index": record.get("row_index"),
|
||
# "row_number_in_run": record.get("row_number_in_run"),
|
||
# "unique_id": record.get("unique_id"),
|
||
# "MedDatum": record.get("MedDatum"),
|
||
#
|
||
# "success": record.get("success"),
|
||
# "error": record.get("error"),
|
||
# "last_error": record.get("last_error"),
|
||
#
|
||
# "json_parse_success": validation.get("json_parse_success"),
|
||
# "required_fields_present": validation.get("required_fields_present"),
|
||
# "required_schema_success": validation.get("required_schema_success"),
|
||
# "clinical_range_valid": validation.get("clinical_range_valid"),
|
||
# "certainty_present": validation.get("certainty_present"),
|
||
#
|
||
# "missing_required_fields": json.dumps(
|
||
# validation.get("missing_required_fields", []),
|
||
# ensure_ascii=False
|
||
# ),
|
||
# "missing_subcategory_fields": json.dumps(
|
||
# validation.get("missing_subcategory_fields", []),
|
||
# ensure_ascii=False
|
||
# ),
|
||
#
|
||
# "inference_time_sec": record.get("inference_time_sec"),
|
||
# "process_cpu_time_sec": record.get("process_cpu_time_sec"),
|
||
# "rss_before_mb": record.get("rss_before_mb"),
|
||
# "rss_after_mb": record.get("rss_after_mb"),
|
||
# "rss_delta_mb": record.get("rss_delta_mb"),
|
||
# "peak_rss_mb": record.get("peak_rss_mb"),
|
||
#
|
||
# "prompt_tokens": record.get("prompt_tokens"),
|
||
# "completion_tokens": record.get("completion_tokens"),
|
||
# "total_tokens": record.get("total_tokens"),
|
||
#
|
||
# "raw_content": record.get("raw_content"),
|
||
# "raw_parsed_output": json.dumps(record.get("raw_parsed_output"), ensure_ascii=False),
|
||
#
|
||
# # Backward-compatible fields
|
||
# "reason": result.get("reason"),
|
||
# "klassifizierbar": result.get("klassifizierbar"),
|
||
#
|
||
# "raw_certainty_percent": result.get("raw_certainty_percent"),
|
||
# "certainty_percent": result.get("certainty_percent"),
|
||
# "certainty_percent_is_numeric": result.get("certainty_percent_is_numeric"),
|
||
# "certainty_percent_in_valid_range": result.get("certainty_percent_in_valid_range"),
|
||
#
|
||
# # EDSS raw/numeric/validity fields
|
||
# "raw_EDSS": result.get("raw_EDSS"),
|
||
# "EDSS_numeric": result.get("EDSS_numeric"),
|
||
# "EDSS": result.get("EDSS"), # backward-compatible; same as EDSS_numeric, not clipped
|
||
# "EDSS_is_numeric": result.get("EDSS_is_numeric"),
|
||
# "EDSS_in_valid_range": result.get("EDSS_in_valid_range"),
|
||
#
|
||
# "all_functional_systems_numeric": result.get("all_functional_systems_numeric"),
|
||
# "all_functional_systems_in_valid_range": result.get("all_functional_systems_in_valid_range"),
|
||
# }
|
||
#
|
||
# raw_subcategories = result.get("raw_subcategories", {})
|
||
# numeric_subcategories = result.get("subcategories", {})
|
||
# subcat_validation = result.get("subcategory_validation", {})
|
||
#
|
||
# for subcat in FUNCTIONAL_SYSTEM_RANGES:
|
||
# raw_value = None
|
||
# numeric_value = None
|
||
# is_numeric = False
|
||
# in_valid_range = False
|
||
#
|
||
# if isinstance(raw_subcategories, dict):
|
||
# raw_value = raw_subcategories.get(subcat)
|
||
#
|
||
# if isinstance(numeric_subcategories, dict):
|
||
# numeric_value = numeric_subcategories.get(subcat)
|
||
#
|
||
# if isinstance(subcat_validation, dict):
|
||
# flags = subcat_validation.get(subcat, {})
|
||
# if isinstance(flags, dict):
|
||
# is_numeric = flags.get("is_numeric", False)
|
||
# in_valid_range = flags.get("in_valid_range", False)
|
||
#
|
||
# # New transparent columns
|
||
# flat[f"raw_subcat_{subcat}"] = raw_value
|
||
# flat[f"numeric_subcat_{subcat}"] = numeric_value
|
||
# flat[f"subcat_{subcat}_is_numeric"] = is_numeric
|
||
# flat[f"subcat_{subcat}_in_valid_range"] = in_valid_range
|
||
#
|
||
# # Backward-compatible old column name.
|
||
# # This is numeric but NOT clipped.
|
||
# flat[f"subcat_{subcat}"] = numeric_value
|
||
#
|
||
# return flat
|
||
#
|
||
#
|
||
## =========================
|
||
## SUMMARY STATISTICS
|
||
## =========================
|
||
#
|
||
#def summarize_records(records):
|
||
# """
|
||
# Create transparent summary statistics per model.
|
||
#
|
||
# Separates:
|
||
# - JSON/schema validity
|
||
# - numeric parse validity
|
||
# - clinical range validity
|
||
# - out-of-range outputs
|
||
# """
|
||
#
|
||
# df = pd.DataFrame([flatten_result(r) for r in records])
|
||
#
|
||
# if df.empty:
|
||
# return pd.DataFrame()
|
||
#
|
||
# def bool_mean(col):
|
||
# if col not in df.columns:
|
||
# return None
|
||
# return df[col].fillna(False).astype(bool).mean()
|
||
#
|
||
# def bool_sum(col):
|
||
# if col not in df.columns:
|
||
# return None
|
||
# return int(df[col].fillna(False).astype(bool).sum())
|
||
#
|
||
# n_records = len(df)
|
||
#
|
||
# summary = {
|
||
# "model": df["model"].iloc[0] if "model" in df.columns else None,
|
||
# "n_total_responses": n_records,
|
||
#
|
||
# "n_success": bool_sum("success"),
|
||
# "success_rate": bool_mean("success"),
|
||
#
|
||
# "n_json_parse_success": bool_sum("json_parse_success"),
|
||
# "json_parse_success_rate": bool_mean("json_parse_success"),
|
||
#
|
||
# "n_required_fields_present": bool_sum("required_fields_present"),
|
||
# "required_fields_present_rate": bool_mean("required_fields_present"),
|
||
#
|
||
# "n_required_schema_success": bool_sum("required_schema_success"),
|
||
# "required_schema_success_rate": bool_mean("required_schema_success"),
|
||
#
|
||
# "n_clinical_range_valid": bool_sum("clinical_range_valid"),
|
||
# "clinical_range_valid_rate": bool_mean("clinical_range_valid"),
|
||
#
|
||
# "n_certainty_present": bool_sum("certainty_present"),
|
||
# "certainty_present_rate": bool_mean("certainty_present"),
|
||
#
|
||
# "n_EDSS_numeric": bool_sum("EDSS_is_numeric"),
|
||
# "EDSS_numeric_rate": bool_mean("EDSS_is_numeric"),
|
||
#
|
||
# "n_EDSS_in_valid_range": bool_sum("EDSS_in_valid_range"),
|
||
# "EDSS_valid_range_rate": bool_mean("EDSS_in_valid_range"),
|
||
# }
|
||
#
|
||
# # EDSS out-of-range among numeric EDSS outputs
|
||
# if "EDSS_is_numeric" in df.columns and "EDSS_in_valid_range" in df.columns:
|
||
# edss_numeric = df["EDSS_is_numeric"].fillna(False).astype(bool)
|
||
# edss_valid = df["EDSS_in_valid_range"].fillna(False).astype(bool)
|
||
# edss_out_of_range = edss_numeric & (~edss_valid)
|
||
#
|
||
# summary["n_EDSS_out_of_range"] = int(edss_out_of_range.sum())
|
||
# summary["EDSS_out_of_range_rate_total"] = float(edss_out_of_range.mean())
|
||
# summary["EDSS_out_of_range_rate_among_numeric"] = (
|
||
# float(edss_out_of_range.sum() / edss_numeric.sum())
|
||
# if edss_numeric.sum() > 0 else None
|
||
# )
|
||
#
|
||
# # Functional system rates
|
||
# fs_out_of_range_any = pd.Series(False, index=df.index)
|
||
# fs_valid_all = pd.Series(True, index=df.index)
|
||
#
|
||
# for subcat in FUNCTIONAL_SYSTEM_RANGES:
|
||
# numeric_col = f"subcat_{subcat}_is_numeric"
|
||
# valid_col = f"subcat_{subcat}_in_valid_range"
|
||
#
|
||
# if numeric_col in df.columns:
|
||
# numeric_series = df[numeric_col].fillna(False).astype(bool)
|
||
# else:
|
||
# numeric_series = pd.Series(False, index=df.index)
|
||
#
|
||
# if valid_col in df.columns:
|
||
# valid_series = df[valid_col].fillna(False).astype(bool)
|
||
# else:
|
||
# valid_series = pd.Series(False, index=df.index)
|
||
#
|
||
# out_of_range_series = numeric_series & (~valid_series)
|
||
#
|
||
# summary[f"n_{subcat}_numeric"] = int(numeric_series.sum())
|
||
# summary[f"{subcat}_numeric_rate"] = float(numeric_series.mean())
|
||
#
|
||
# summary[f"n_{subcat}_in_valid_range"] = int(valid_series.sum())
|
||
# summary[f"{subcat}_valid_range_rate"] = float(valid_series.mean())
|
||
#
|
||
# summary[f"n_{subcat}_out_of_range"] = int(out_of_range_series.sum())
|
||
# summary[f"{subcat}_out_of_range_rate_total"] = float(out_of_range_series.mean())
|
||
# summary[f"{subcat}_out_of_range_rate_among_numeric"] = (
|
||
# float(out_of_range_series.sum() / numeric_series.sum())
|
||
# if numeric_series.sum() > 0 else None
|
||
# )
|
||
#
|
||
# fs_out_of_range_any = fs_out_of_range_any | out_of_range_series
|
||
# fs_valid_all = fs_valid_all & valid_series
|
||
#
|
||
# summary["n_any_functional_system_out_of_range"] = int(fs_out_of_range_any.sum())
|
||
# summary["any_functional_system_out_of_range_rate_total"] = float(fs_out_of_range_any.mean())
|
||
#
|
||
# summary["n_all_functional_systems_in_valid_range"] = int(fs_valid_all.sum())
|
||
# summary["all_functional_systems_valid_range_rate"] = float(fs_valid_all.mean())
|
||
#
|
||
# numeric_cols = [
|
||
# "inference_time_sec",
|
||
# "process_cpu_time_sec",
|
||
# "rss_delta_mb",
|
||
# "peak_rss_mb",
|
||
# "prompt_tokens",
|
||
# "completion_tokens",
|
||
# "total_tokens",
|
||
# "certainty_percent",
|
||
# "EDSS_numeric",
|
||
# ]
|
||
#
|
||
# for col in numeric_cols:
|
||
# if col in df.columns:
|
||
# values = pd.to_numeric(df[col], errors="coerce")
|
||
# summary[f"{col}_mean"] = values.mean()
|
||
# summary[f"{col}_median"] = values.median()
|
||
# summary[f"{col}_std"] = values.std()
|
||
# summary[f"{col}_min"] = values.min()
|
||
# summary[f"{col}_max"] = values.max()
|
||
#
|
||
# if "EDSS_is_numeric" in df.columns and "EDSS_in_valid_range" in df.columns:
|
||
# primary_valid_only = (
|
||
# df["EDSS_is_numeric"].fillna(False).astype(bool)
|
||
# & df["EDSS_in_valid_range"].fillna(False).astype(bool)
|
||
# )
|
||
#
|
||
# sensitivity_all_numeric = df["EDSS_is_numeric"].fillna(False).astype(bool)
|
||
#
|
||
# summary["n_primary_valid_only_EDSS"] = int(primary_valid_only.sum())
|
||
# summary["primary_valid_only_EDSS_rate"] = float(primary_valid_only.mean())
|
||
#
|
||
# summary["n_sensitivity_all_numeric_EDSS"] = int(sensitivity_all_numeric.sum())
|
||
# summary["sensitivity_all_numeric_EDSS_rate"] = float(sensitivity_all_numeric.mean())
|
||
#
|
||
# return pd.DataFrame([summary])
|
||
#
|
||
#
|
||
## =========================
|
||
## ANALYSIS DATASET HELPERS
|
||
## =========================
|
||
#
|
||
#def create_analysis_datasets(records):
|
||
# """
|
||
# Create two transparent EDSS analysis datasets:
|
||
#
|
||
# 1. primary_valid_only:
|
||
# Only numeric EDSS predictions within the valid clinical range.
|
||
#
|
||
# 2. sensitivity_all_numeric:
|
||
# All numeric EDSS predictions, including out-of-range values.
|
||
# No clipping is applied.
|
||
# """
|
||
#
|
||
# df = pd.DataFrame([flatten_result(r) for r in records])
|
||
#
|
||
# if df.empty:
|
||
# return df.copy(), df.copy()
|
||
#
|
||
# primary_valid_only = df[
|
||
# df["EDSS_is_numeric"].fillna(False).astype(bool)
|
||
# & df["EDSS_in_valid_range"].fillna(False).astype(bool)
|
||
# ].copy()
|
||
#
|
||
# sensitivity_all_numeric = df[
|
||
# df["EDSS_is_numeric"].fillna(False).astype(bool)
|
||
# ].copy()
|
||
#
|
||
# return primary_valid_only, sensitivity_all_numeric
|
||
#
|
||
#
|
||
## =========================
|
||
## INCREMENTAL SAVE HELPERS
|
||
## =========================
|
||
#
|
||
#def append_jsonl(path, record):
|
||
# with open(path, "a", encoding="utf-8") as f:
|
||
# f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
||
# f.flush()
|
||
# os.fsync(f.fileno())
|
||
#
|
||
#
|
||
#def append_csv(path, record):
|
||
# flat = flatten_result(record)
|
||
# df_one = pd.DataFrame([flat])
|
||
# file_exists = Path(path).exists()
|
||
# df_one.to_csv(path, mode="a", header=not file_exists, index=False)
|
||
#
|
||
#
|
||
## =========================
|
||
## MAIN LOOP
|
||
## =========================
|
||
#
|
||
#if __name__ == "__main__":
|
||
#
|
||
# run_timestamp = now_timestamp()
|
||
#
|
||
# results_root = Path(RESULTS_ROOT)
|
||
# results_root.mkdir(parents=True, exist_ok=True)
|
||
#
|
||
# run_root = results_root / f"run_{run_timestamp}"
|
||
# run_root.mkdir(parents=True, exist_ok=True)
|
||
#
|
||
# print(f"Results root: {run_root}")
|
||
#
|
||
# df = pd.read_csv(INPUT_CSV, sep=";")
|
||
#
|
||
# if MAX_ROWS is not None:
|
||
# df = df.head(MAX_ROWS)
|
||
#
|
||
# total_rows = len(df)
|
||
#
|
||
# model_names_for_print = [m["model_name"] for m in MODEL_CONFIGS]
|
||
#
|
||
# print(f"Loaded {total_rows} patient records.")
|
||
# print(f"Models: {model_names_for_print}")
|
||
# print(f"Iterations per model: {NUM_ITERATIONS}")
|
||
#
|
||
# all_model_summaries = []
|
||
#
|
||
# for model_config in MODEL_CONFIGS:
|
||
# model_name = model_config["model_name"]
|
||
# safe_model = safe_dir_name(model_name)
|
||
#
|
||
# model_dir = run_root / safe_model
|
||
# model_dir.mkdir(parents=True, exist_ok=True)
|
||
#
|
||
# print(f"\n{'#' * 80}")
|
||
# print(f"MODEL: {model_name}")
|
||
# print(f"use_response_format: {model_config.get('use_response_format', False)}")
|
||
# print(f"temperature: {model_config.get('temperature', TEMPERATURE)}")
|
||
# print(f"max_tokens: {model_config.get('max_tokens', MAX_TOKENS)}")
|
||
# print(f"Saving to: {model_dir}")
|
||
# print(f"{'#' * 80}")
|
||
#
|
||
# model_records = []
|
||
# model_start = time.perf_counter()
|
||
#
|
||
# for iteration in range(1, NUM_ITERATIONS + 1):
|
||
# print(f"\n{'=' * 60}")
|
||
# print(f"🔄 MODEL {model_name} | ITERATION {iteration}/{NUM_ITERATIONS}")
|
||
# print(f"{'=' * 60}")
|
||
#
|
||
# iteration_results = []
|
||
# iteration_start = time.perf_counter()
|
||
#
|
||
# incremental_jsonl_path = model_dir / f"{safe_model}_iter_{iteration}_{run_timestamp}_incremental.jsonl"
|
||
# incremental_csv_path = model_dir / f"{safe_model}_iter_{iteration}_{run_timestamp}_incremental.csv"
|
||
#
|
||
# print(f"Incremental JSONL: {incremental_jsonl_path}")
|
||
# print(f"Incremental CSV: {incremental_csv_path}")
|
||
#
|
||
# for loop_i, (idx, row) in enumerate(df.iterrows(), start=1):
|
||
# print(
|
||
# f"\rModel={model_name} | Row {loop_i}/{total_rows} | Iter {iteration}",
|
||
# end="",
|
||
# flush=True
|
||
# )
|
||
#
|
||
# try:
|
||
# patient_text = build_patient_text(row)
|
||
#
|
||
# record = run_inference(
|
||
# patient_text=patient_text,
|
||
# model_config=model_config
|
||
# )
|
||
#
|
||
# record["iteration"] = iteration
|
||
# record["row_index"] = int(idx)
|
||
# record["row_number_in_run"] = int(loop_i)
|
||
# record["unique_id"] = row.get("unique_id", f"row_{idx}")
|
||
# record["MedDatum"] = row.get("MedDatum", None)
|
||
#
|
||
# iteration_results.append(record)
|
||
# model_records.append(record)
|
||
#
|
||
# if loop_i % SAVE_EVERY_N_ROWS == 0:
|
||
# append_jsonl(incremental_jsonl_path, record)
|
||
# append_csv(incremental_csv_path, record)
|
||
#
|
||
# if record["success"]:
|
||
# res = record["result"] or {}
|
||
# edss_display = res.get("EDSS_numeric", None)
|
||
# edss_valid = res.get("EDSS_in_valid_range", False)
|
||
#
|
||
# print(
|
||
# f" ✅ EDSS={edss_display}, "
|
||
# f"valid_range={edss_valid}, "
|
||
# f"time={record['inference_time_sec']:.2f}s"
|
||
# )
|
||
# else:
|
||
# print(f" ❌ {record.get('error', 'Unknown error')}")
|
||
#
|
||
# except Exception as e:
|
||
# print(f"\n⚠️ Row {idx} failed outside inference wrapper: {e}")
|
||
#
|
||
# fallback_record = {
|
||
# "success": False,
|
||
# "error": str(e),
|
||
# "last_error": str(e),
|
||
# "result": None,
|
||
#
|
||
# "validation": {
|
||
# "json_parse_success": False,
|
||
# "required_fields_present": False,
|
||
# "required_schema_success": False,
|
||
# "clinical_range_valid": False,
|
||
# "certainty_present": False,
|
||
# "missing_required_fields": [],
|
||
# "missing_subcategory_fields": [],
|
||
# "EDSS_is_numeric": False,
|
||
# "EDSS_in_valid_range": False,
|
||
# },
|
||
# "raw_parsed_output": None,
|
||
#
|
||
# "model": model_name,
|
||
# "iteration": iteration,
|
||
# "row_index": int(idx),
|
||
# "row_number_in_run": int(loop_i),
|
||
# "unique_id": row.get("unique_id", f"row_{idx}"),
|
||
# "MedDatum": row.get("MedDatum", None),
|
||
#
|
||
# "inference_time_sec": None,
|
||
# "process_cpu_time_sec": None,
|
||
# "rss_before_mb": None,
|
||
# "rss_after_mb": None,
|
||
# "rss_delta_mb": None,
|
||
# "peak_rss_mb": None,
|
||
#
|
||
# "prompt_tokens": None,
|
||
# "completion_tokens": None,
|
||
# "total_tokens": None,
|
||
#
|
||
# "raw_content": None,
|
||
# "raw_response_debug": None,
|
||
# }
|
||
#
|
||
# iteration_results.append(fallback_record)
|
||
# model_records.append(fallback_record)
|
||
#
|
||
# append_jsonl(incremental_jsonl_path, fallback_record)
|
||
# append_csv(incremental_csv_path, fallback_record)
|
||
#
|
||
# if STOP_ON_FIRST_ERROR:
|
||
# break
|
||
#
|
||
# iteration_elapsed = time.perf_counter() - iteration_start
|
||
#
|
||
# # Final full per-iteration JSON
|
||
# iter_json_path = model_dir / f"{safe_model}_results_iter_{iteration}_{run_timestamp}.json"
|
||
# with open(iter_json_path, "w", encoding="utf-8") as f:
|
||
# json.dump(iteration_results, f, indent=2, ensure_ascii=False)
|
||
#
|
||
# # Final full per-iteration CSV
|
||
# iter_csv_path = model_dir / f"{safe_model}_results_iter_{iteration}_{run_timestamp}.csv"
|
||
# iter_flat_df = pd.DataFrame([flatten_result(r) for r in iteration_results])
|
||
# iter_flat_df.to_csv(iter_csv_path, index=False)
|
||
#
|
||
# # Transparent analysis datasets
|
||
# primary_valid_only_df, sensitivity_all_numeric_df = create_analysis_datasets(iteration_results)
|
||
#
|
||
# primary_valid_only_path = model_dir / f"{safe_model}_results_iter_{iteration}_{run_timestamp}_primary_valid_only.csv"
|
||
# sensitivity_all_numeric_path = model_dir / f"{safe_model}_results_iter_{iteration}_{run_timestamp}_sensitivity_all_numeric.csv"
|
||
#
|
||
# primary_valid_only_df.to_csv(primary_valid_only_path, index=False)
|
||
# sensitivity_all_numeric_df.to_csv(sensitivity_all_numeric_path, index=False)
|
||
#
|
||
# print(f"\n✅ Iteration {iteration} complete.")
|
||
# print(f"Incremental JSONL saved to: {incremental_jsonl_path}")
|
||
# print(f"Incremental CSV saved to: {incremental_csv_path}")
|
||
# print(f"Final JSON saved to: {iter_json_path}")
|
||
# print(f"Final CSV saved to: {iter_csv_path}")
|
||
# print(f"Primary valid-only CSV saved to: {primary_valid_only_path}")
|
||
# print(f"Sensitivity all-numeric CSV: {sensitivity_all_numeric_path}")
|
||
# print(
|
||
# f"⏱️ Iteration time: {iteration_elapsed:.1f}s "
|
||
# f"({iteration_elapsed / max(total_rows, 1):.2f}s/row)"
|
||
# )
|
||
#
|
||
# model_elapsed = time.perf_counter() - model_start
|
||
#
|
||
# # Save all records for this model
|
||
# model_json_path = model_dir / f"{safe_model}_all_results_{run_timestamp}.json"
|
||
# with open(model_json_path, "w", encoding="utf-8") as f:
|
||
# json.dump(model_records, f, indent=2, ensure_ascii=False)
|
||
#
|
||
# model_csv_path = model_dir / f"{safe_model}_all_results_{run_timestamp}.csv"
|
||
# model_flat_df = pd.DataFrame([flatten_result(r) for r in model_records])
|
||
# model_flat_df.to_csv(model_csv_path, index=False)
|
||
#
|
||
# # Save model-level analysis datasets
|
||
# primary_valid_only_df, sensitivity_all_numeric_df = create_analysis_datasets(model_records)
|
||
#
|
||
# model_primary_valid_only_path = model_dir / f"{safe_model}_all_results_{run_timestamp}_primary_valid_only.csv"
|
||
# model_sensitivity_all_numeric_path = model_dir / f"{safe_model}_all_results_{run_timestamp}_sensitivity_all_numeric.csv"
|
||
#
|
||
# primary_valid_only_df.to_csv(model_primary_valid_only_path, index=False)
|
||
# sensitivity_all_numeric_df.to_csv(model_sensitivity_all_numeric_path, index=False)
|
||
#
|
||
# # Save model summary
|
||
# model_summary_df = summarize_records(model_records)
|
||
# model_summary_df["model_total_wall_time_sec"] = model_elapsed
|
||
# model_summary_df["model_total_wall_time_min"] = model_elapsed / 60
|
||
#
|
||
# model_summary_path = model_dir / f"{safe_model}_summary_{run_timestamp}.csv"
|
||
# model_summary_df.to_csv(model_summary_path, index=False)
|
||
#
|
||
# all_model_summaries.append(model_summary_df)
|
||
#
|
||
# print(f"\n🎉 Model completed: {model_name}")
|
||
# print(f"All JSON: {model_json_path}")
|
||
# print(f"All CSV: {model_csv_path}")
|
||
# print(f"All primary valid-only CSV: {model_primary_valid_only_path}")
|
||
# print(f"All sensitivity all-numeric CSV: {model_sensitivity_all_numeric_path}")
|
||
# print(f"Summary: {model_summary_path}")
|
||
# print(f"Total model time: {model_elapsed / 60:.2f} min")
|
||
#
|
||
# if all_model_summaries:
|
||
# combined_summary_df = pd.concat(all_model_summaries, ignore_index=True)
|
||
# combined_summary_path = run_root / f"all_models_summary_{run_timestamp}.csv"
|
||
# combined_summary_df.to_csv(combined_summary_path, index=False)
|
||
#
|
||
# print(f"\n📊 Combined summary saved to: {combined_summary_path}")
|
||
#
|
||
# print(f"\n🎉 All models and all iterations completed!")
|
||
##
|
||
|
||
|
||
|
||
|
||
# %% Minimal parallel EDSS benchmark with correct klassifizierbar/EDSS logic
|
||
|
||
import os
|
||
import re
|
||
import json
|
||
import time
|
||
from pathlib import Path
|
||
from datetime import datetime
|
||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||
|
||
import pandas as pd
|
||
from openai import OpenAI
|
||
from dotenv import load_dotenv
|
||
|
||
|
||
# =========================
|
||
# CONFIGURATION
|
||
# =========================
|
||
|
||
load_dotenv()
|
||
|
||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
||
OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")
|
||
|
||
INPUT_CSV = "/home/shahin/Lab/Doktorarbeit/Barcelona/data/processed/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv"
|
||
EDSS_INSTRUCTIONS_PATH = "/home/shahin/Lab/Doktorarbeit/Barcelona/prompts/Komplett.txt"
|
||
RESULTS_ROOT = "/home/shahin/Lab/Doktorarbeit/Barcelona/results/benchmark_runs"
|
||
|
||
MODEL_CONFIGS = [
|
||
{
|
||
"model_name": "gpt-oss-120b",
|
||
"use_response_format": True,
|
||
"temperature": 0.0,
|
||
"max_tokens": 4096,
|
||
"extra_body": None,
|
||
},
|
||
{
|
||
"model_name": "qwen3.6-27b",
|
||
"use_response_format": False,
|
||
"temperature": 0.0,
|
||
"max_tokens": 4096,
|
||
"extra_body": {
|
||
"chat_template_kwargs": {
|
||
"enable_thinking": False
|
||
}
|
||
},
|
||
},
|
||
{
|
||
"model_name": "gemma-4-31B-it",
|
||
"use_response_format": False,
|
||
"temperature": 0.0,
|
||
"max_tokens": 4096,
|
||
"extra_body": None,
|
||
},
|
||
]
|
||
|
||
NUM_ITERATIONS = 10
|
||
|
||
MAX_ROWS = None
|
||
# MAX_ROWS = 2
|
||
|
||
PARALLEL_WORKERS = 10
|
||
BATCH_SIZE = 100
|
||
|
||
MAX_JSON_RETRIES = 5
|
||
RETRY_SLEEP_SEC = 2
|
||
|
||
STOP_ON_FIRST_ERROR = False
|
||
|
||
|
||
# =========================
|
||
# CONSTANTS
|
||
# =========================
|
||
|
||
EDSS_MIN = 0.0
|
||
EDSS_MAX = 10.0
|
||
|
||
FUNCTIONAL_SYSTEM_RANGES = {
|
||
"VISUAL_OPTIC_FUNCTIONS": (0.0, 6.0),
|
||
"BRAINSTEM_FUNCTIONS": (0.0, 6.0),
|
||
"PYRAMIDAL_FUNCTIONS": (0.0, 6.0),
|
||
"CEREBELLAR_FUNCTIONS": (0.0, 6.0),
|
||
"SENSORY_FUNCTIONS": (0.0, 6.0),
|
||
"BOWEL_AND_BLADDER_FUNCTIONS": (0.0, 6.0),
|
||
"CEREBRAL_FUNCTIONS": (0.0, 6.0),
|
||
"AMBULATION": (0.0, 10.0),
|
||
}
|
||
|
||
REQUIRED_TOP_LEVEL_FIELDS = [
|
||
"reason",
|
||
"klassifizierbar",
|
||
"EDSS",
|
||
"certainty_percent",
|
||
"subcategories",
|
||
]
|
||
|
||
|
||
# =========================
|
||
# CLIENT
|
||
# =========================
|
||
|
||
client = OpenAI(
|
||
api_key=OPENAI_API_KEY,
|
||
base_url=OPENAI_BASE_URL,
|
||
)
|
||
|
||
|
||
# =========================
|
||
# HELPERS
|
||
# =========================
|
||
|
||
def safe_dir_name(name):
|
||
name = str(name).strip()
|
||
name = re.sub(r"[^\w\-.]+", "_", name)
|
||
return name[:150]
|
||
|
||
|
||
def now_timestamp():
|
||
return datetime.now().strftime("%Y%m%d_%H%M%S")
|
||
|
||
|
||
def parse_float(value):
|
||
"""
|
||
Returns:
|
||
- raw value
|
||
- numeric float value or None
|
||
- is_numeric boolean
|
||
"""
|
||
if value is None or isinstance(value, bool):
|
||
return value, None, False
|
||
|
||
try:
|
||
return value, float(str(value).replace(",", ".")), True
|
||
except Exception:
|
||
return value, None, False
|
||
def is_in_range(value, min_value, max_value):
|
||
if value is None:
|
||
return False
|
||
return min_value <= value <= max_value
|
||
|
||
|
||
def build_patient_text(row):
|
||
return (
|
||
str(row.get("T_Zusammenfassung", "")) + "\n" +
|
||
str(row.get("Diagnosen", "")) + "\n" +
|
||
str(row.get("T_KlinBef", "")) + "\n" +
|
||
str(row.get("T_Befunde", ""))
|
||
)
|
||
|
||
|
||
# =========================
|
||
# READ EDSS INSTRUCTIONS
|
||
# =========================
|
||
|
||
with open(EDSS_INSTRUCTIONS_PATH, "r", encoding="utf-8") as f:
|
||
EDSS_INSTRUCTIONS = f.read().strip()
|
||
|
||
|
||
# =========================
|
||
# PROMPT
|
||
# =========================
|
||
|
||
def build_prompt(patient_text):
|
||
return f"""Du bist ein medizinischer Assistent für EDSS-Extraktion aus klinischen Berichten.
|
||
|
||
Extrahiere:
|
||
1. Ob der Bericht für eine EDSS-Einschätzung klassifizierbar ist.
|
||
2. Falls klassifizierbar: Gesamt-EDSS-Score von 0.0 bis 10.0.
|
||
3. Alle 8 EDSS-Unterkategorien, soweit ableitbar.
|
||
4. Sicherheit als Ganzzahl von 0 bis 100.
|
||
|
||
Antworte ausschließlich mit EINEM validen JSON-Objekt.
|
||
Kein Markdown. Keine Code-Fences. Kein Text vor oder nach JSON.
|
||
Keine Platzhalter. Kopiere kein Schema.
|
||
|
||
Das JSON muss exakt diese Schlüssel enthalten:
|
||
- reason
|
||
- klassifizierbar
|
||
- EDSS
|
||
- certainty_percent
|
||
- subcategories
|
||
|
||
Die subcategories müssen exakt diese 8 Schlüssel enthalten:
|
||
- VISUAL_OPTIC_FUNCTIONS
|
||
- BRAINSTEM_FUNCTIONS
|
||
- PYRAMIDAL_FUNCTIONS
|
||
- CEREBELLAR_FUNCTIONS
|
||
- SENSORY_FUNCTIONS
|
||
- BOWEL_AND_BLADDER_FUNCTIONS
|
||
- CEREBRAL_FUNCTIONS
|
||
- AMBULATION
|
||
|
||
Wichtige Regeln:
|
||
- klassifizierbar muss true oder false sein.
|
||
- Wenn klassifizierbar=true:
|
||
- EDSS muss eine Zahl zwischen 0.0 und 10.0 sein.
|
||
- EDSS darf dann niemals null sein.
|
||
- Wenn klassifizierbar=false:
|
||
- EDSS muss null sein.
|
||
- Verwende klassifizierbar=false nur, wenn keine vernünftige EDSS-Einschätzung möglich ist.
|
||
- Setze klassifizierbar=true, wenn irgendeine plausible EDSS-Einschätzung aus neurologischem Befund, Gehfähigkeit, Funktionssystemen, Diagnoseverlauf oder explizitem EDSS-Wert ableitbar ist.
|
||
- Fehlende einzelne Unterkategorien sind kein Grund für klassifizierbar=false.
|
||
- Einzelne Unterkategorien dürfen null sein, wenn sie nicht ausreichend ableitbar sind.
|
||
- certainty_percent muss eine Ganzzahl von 0 bis 100 sein.
|
||
- reason: maximal 250 Zeichen, Deutsch.
|
||
|
||
Valide Beispielausgabe bei klassifizierbarem Bericht:
|
||
{{
|
||
"reason": "Leichte Einschränkungen mit sicher ableitbarer Gehfähigkeit.",
|
||
"klassifizierbar": true,
|
||
"EDSS": 2.0,
|
||
"certainty_percent": 90,
|
||
"subcategories": {{
|
||
"VISUAL_OPTIC_FUNCTIONS": null,
|
||
"BRAINSTEM_FUNCTIONS": null,
|
||
"PYRAMIDAL_FUNCTIONS": 1.0,
|
||
"CEREBELLAR_FUNCTIONS": 1.0,
|
||
"SENSORY_FUNCTIONS": 1.0,
|
||
"BOWEL_AND_BLADDER_FUNCTIONS": null,
|
||
"CEREBRAL_FUNCTIONS": null,
|
||
"AMBULATION": 0.0
|
||
}}
|
||
}}
|
||
|
||
Valide Beispielausgabe bei nicht klassifizierbarem Bericht:
|
||
{{
|
||
"reason": "Keine ausreichenden neurologischen Informationen für eine EDSS-Einschätzung.",
|
||
"klassifizierbar": false,
|
||
"EDSS": null,
|
||
"certainty_percent": 0,
|
||
"subcategories": {{
|
||
"VISUAL_OPTIC_FUNCTIONS": null,
|
||
"BRAINSTEM_FUNCTIONS": null,
|
||
"PYRAMIDAL_FUNCTIONS": null,
|
||
"CEREBELLAR_FUNCTIONS": null,
|
||
"SENSORY_FUNCTIONS": null,
|
||
"BOWEL_AND_BLADDER_FUNCTIONS": null,
|
||
"CEREBRAL_FUNCTIONS": null,
|
||
"AMBULATION": null
|
||
}}
|
||
}}
|
||
|
||
EDSS-Bewertungsrichtlinien:
|
||
{EDSS_INSTRUCTIONS}
|
||
|
||
Patientenbericht:
|
||
{patient_text}
|
||
|
||
Gib ausschließlich das finale JSON-Objekt zurück.
|
||
"""
|
||
|
||
|
||
# =========================
|
||
# JSON EXTRACTION
|
||
# =========================
|
||
|
||
def extract_json_from_text(text):
|
||
if text is None:
|
||
raise ValueError("Model returned empty content")
|
||
|
||
text = str(text).strip()
|
||
|
||
if not text:
|
||
raise ValueError("Model returned empty content")
|
||
|
||
text = (
|
||
text.replace("```json", "")
|
||
.replace("```JSON", "")
|
||
.replace("```Json", "")
|
||
.replace("```", "")
|
||
.strip()
|
||
)
|
||
|
||
try:
|
||
parsed = json.loads(text)
|
||
if isinstance(parsed, dict):
|
||
return parsed
|
||
except json.JSONDecodeError:
|
||
pass
|
||
|
||
candidates = []
|
||
stack = []
|
||
start_idx = None
|
||
in_string = False
|
||
escape = False
|
||
|
||
for i, ch in enumerate(text):
|
||
if escape:
|
||
escape = False
|
||
continue
|
||
|
||
if ch == "\\":
|
||
escape = True
|
||
continue
|
||
|
||
if ch == '"':
|
||
in_string = not in_string
|
||
continue
|
||
|
||
if in_string:
|
||
continue
|
||
|
||
if ch == "{":
|
||
if not stack:
|
||
start_idx = i
|
||
stack.append(ch)
|
||
|
||
elif ch == "}":
|
||
if stack:
|
||
stack.pop()
|
||
if not stack and start_idx is not None:
|
||
candidates.append(text[start_idx:i + 1])
|
||
start_idx = None
|
||
|
||
for candidate in reversed(candidates):
|
||
try:
|
||
parsed = json.loads(candidate)
|
||
if isinstance(parsed, dict):
|
||
return parsed
|
||
except json.JSONDecodeError:
|
||
continue
|
||
|
||
raise ValueError(f"No valid JSON object found. Raw starts with: {text[:500]}")
|
||
|
||
|
||
def extract_message_content(message):
|
||
content = getattr(message, "content", None)
|
||
|
||
if content is not None:
|
||
return content
|
||
|
||
try:
|
||
msg = message.model_dump()
|
||
except Exception:
|
||
return None
|
||
|
||
for key in ["content", "reasoning_content", "reasoning", "text", "output_text"]:
|
||
if msg.get(key):
|
||
return msg[key]
|
||
|
||
return None
|
||
|
||
|
||
# =========================
|
||
# VALIDATION WITHOUT CLIPPING
|
||
# =========================
|
||
|
||
def validate_model_output(parsed):
|
||
missing_required = [
|
||
field for field in REQUIRED_TOP_LEVEL_FIELDS
|
||
if field not in parsed
|
||
]
|
||
|
||
required_fields_present = len(missing_required) == 0
|
||
|
||
klassifizierbar = parsed.get("klassifizierbar", None)
|
||
klassifizierbar_is_bool = isinstance(klassifizierbar, bool)
|
||
|
||
raw_certainty = parsed.get("certainty_percent")
|
||
_, certainty_numeric, certainty_is_numeric = parse_float(raw_certainty)
|
||
|
||
raw_edss = parsed.get("EDSS")
|
||
raw_edss, edss_numeric, edss_is_numeric = parse_float(raw_edss)
|
||
|
||
edss_in_valid_range = (
|
||
is_in_range(edss_numeric, EDSS_MIN, EDSS_MAX)
|
||
if edss_is_numeric else False
|
||
)
|
||
|
||
if klassifizierbar is True:
|
||
edss_logic_valid = edss_is_numeric and edss_in_valid_range
|
||
elif klassifizierbar is False:
|
||
edss_logic_valid = raw_edss is None
|
||
else:
|
||
edss_logic_valid = False
|
||
|
||
raw_subcats = parsed.get("subcategories")
|
||
if not isinstance(raw_subcats, dict):
|
||
raw_subcats = {}
|
||
|
||
subcats_numeric = {}
|
||
raw_subcats_out = {}
|
||
subcat_validation = {}
|
||
missing_subcats = []
|
||
|
||
for name, (min_value, max_value) in FUNCTIONAL_SYSTEM_RANGES.items():
|
||
if name not in raw_subcats:
|
||
missing_subcats.append(name)
|
||
|
||
raw_value = raw_subcats.get(name)
|
||
raw_value, numeric_value, is_numeric = parse_float(raw_value)
|
||
|
||
if raw_value is None:
|
||
in_valid_range = True
|
||
valid_when_present = True
|
||
else:
|
||
in_valid_range = (
|
||
is_in_range(numeric_value, min_value, max_value)
|
||
if is_numeric else False
|
||
)
|
||
valid_when_present = is_numeric and in_valid_range
|
||
|
||
raw_subcats_out[name] = raw_value
|
||
subcats_numeric[name] = numeric_value
|
||
subcat_validation[name] = {
|
||
"is_numeric": is_numeric,
|
||
"in_valid_range": in_valid_range,
|
||
"valid_when_present": valid_when_present,
|
||
"is_missing_or_null": raw_value is None,
|
||
}
|
||
|
||
all_subcats_valid_when_present = all(
|
||
subcat_validation[name]["valid_when_present"]
|
||
or subcat_validation[name]["is_missing_or_null"]
|
||
for name in FUNCTIONAL_SYSTEM_RANGES
|
||
)
|
||
|
||
required_schema_success = (
|
||
required_fields_present
|
||
and len(missing_subcats) == 0
|
||
and klassifizierbar_is_bool
|
||
)
|
||
|
||
clinical_output_valid = (
|
||
required_schema_success
|
||
and edss_logic_valid
|
||
and all_subcats_valid_when_present
|
||
)
|
||
|
||
return {
|
||
"reason": parsed.get("reason"),
|
||
"klassifizierbar": klassifizierbar,
|
||
"klassifizierbar_is_bool": klassifizierbar_is_bool,
|
||
|
||
"raw_certainty_percent": raw_certainty,
|
||
"certainty_percent": certainty_numeric if certainty_is_numeric else None,
|
||
"certainty_present": raw_certainty is not None,
|
||
"certainty_percent_is_numeric": certainty_is_numeric,
|
||
"certainty_percent_in_valid_range": (
|
||
is_in_range(certainty_numeric, 0.0, 100.0)
|
||
if certainty_is_numeric else False
|
||
),
|
||
|
||
"raw_EDSS": raw_edss,
|
||
"EDSS_numeric": edss_numeric,
|
||
"EDSS": edss_numeric,
|
||
"EDSS_is_numeric": edss_is_numeric,
|
||
"EDSS_in_valid_range": edss_in_valid_range,
|
||
"edss_logic_valid": edss_logic_valid,
|
||
|
||
"json_parse_success": True,
|
||
"required_fields_present": required_fields_present,
|
||
"required_schema_success": required_schema_success,
|
||
|
||
"clinical_range_valid": clinical_output_valid,
|
||
"clinical_output_valid": clinical_output_valid,
|
||
|
||
"missing_required_fields": missing_required,
|
||
"missing_subcategory_fields": missing_subcats,
|
||
|
||
"subcategories": subcats_numeric,
|
||
"raw_subcategories": raw_subcats_out,
|
||
"subcategory_validation": subcat_validation,
|
||
|
||
"all_functional_systems_valid_when_present": all_subcats_valid_when_present,
|
||
}
|
||
|
||
|
||
# =========================
|
||
# API CALL
|
||
# =========================
|
||
|
||
def make_chat_completion(model_config, prompt):
|
||
kwargs = {
|
||
"messages": [
|
||
{
|
||
"role": "system",
|
||
"content": (
|
||
"Du bist ein JSON-Generator. "
|
||
"Antworte ausschließlich mit einem einzigen validen JSON-Objekt. "
|
||
"Keine Erklärung. Kein Markdown. Keine Code-Fences."
|
||
),
|
||
},
|
||
{
|
||
"role": "user",
|
||
"content": prompt,
|
||
},
|
||
],
|
||
"model": model_config["model_name"],
|
||
"max_tokens": model_config["max_tokens"],
|
||
"temperature": model_config["temperature"],
|
||
}
|
||
|
||
if model_config.get("use_response_format", False):
|
||
kwargs["response_format"] = {"type": "json_object"}
|
||
|
||
if model_config.get("extra_body") is not None:
|
||
kwargs["extra_body"] = model_config["extra_body"]
|
||
|
||
return client.chat.completions.create(**kwargs)
|
||
|
||
|
||
def run_inference(patient_text, model_config):
|
||
base_prompt = build_prompt(patient_text)
|
||
prompt = base_prompt
|
||
model_name = model_config["model_name"]
|
||
|
||
start = time.perf_counter()
|
||
|
||
raw_content = None
|
||
raw_response_debug = None
|
||
last_error = None
|
||
|
||
prompt_tokens = None
|
||
completion_tokens = None
|
||
total_tokens = None
|
||
|
||
for attempt in range(1, MAX_JSON_RETRIES + 2):
|
||
try:
|
||
response = make_chat_completion(model_config, prompt)
|
||
message = response.choices[0].message
|
||
raw_content = extract_message_content(message)
|
||
|
||
try:
|
||
raw_response_debug = response.model_dump()
|
||
except Exception:
|
||
raw_response_debug = str(response)
|
||
|
||
usage = getattr(response, "usage", None)
|
||
if usage is not None:
|
||
prompt_tokens = getattr(usage, "prompt_tokens", None)
|
||
completion_tokens = getattr(usage, "completion_tokens", None)
|
||
total_tokens = getattr(usage, "total_tokens", None)
|
||
|
||
parsed = extract_json_from_text(raw_content)
|
||
validated = validate_model_output(parsed)
|
||
|
||
if not validated.get("clinical_output_valid", False):
|
||
raise ValueError(
|
||
"Model output is logically invalid. "
|
||
f"klassifizierbar={validated.get('klassifizierbar')}, "
|
||
f"raw_EDSS={validated.get('raw_EDSS')}, "
|
||
f"EDSS_numeric={validated.get('EDSS_numeric')}, "
|
||
f"edss_logic_valid={validated.get('edss_logic_valid')}, "
|
||
f"missing_required={validated.get('missing_required_fields')}, "
|
||
f"missing_subcats={validated.get('missing_subcategory_fields')}"
|
||
)
|
||
|
||
return {
|
||
"success": True,
|
||
"error": None,
|
||
"last_error": last_error,
|
||
"model": model_name,
|
||
"result": validated,
|
||
"raw_parsed_output": parsed,
|
||
"raw_content": raw_content,
|
||
"raw_response_debug": None,
|
||
"inference_time_sec": time.perf_counter() - start,
|
||
"prompt_tokens": prompt_tokens,
|
||
"completion_tokens": completion_tokens,
|
||
"total_tokens": total_tokens,
|
||
}
|
||
|
||
except Exception as e:
|
||
last_error = str(e)
|
||
|
||
if attempt <= MAX_JSON_RETRIES:
|
||
prompt = f"""
|
||
Deine vorherige Antwort war ungültig.
|
||
|
||
Fehler:
|
||
{last_error}
|
||
|
||
Antworte erneut mit genau EINEM validen JSON-Objekt.
|
||
|
||
Strikte Regeln:
|
||
- Kein Markdown.
|
||
- Keine Code-Fences.
|
||
- Kein Text außerhalb des JSON.
|
||
- klassifizierbar muss true oder false sein.
|
||
- Wenn klassifizierbar=true: EDSS muss eine Zahl zwischen 0.0 und 10.0 sein.
|
||
- Wenn klassifizierbar=false: EDSS muss null sein.
|
||
- Verwende klassifizierbar=false nur, wenn keine vernünftige EDSS-Einschätzung möglich ist.
|
||
- Fehlende einzelne Unterkategorien sind kein Grund für klassifizierbar=false.
|
||
- Alle 8 subcategories-Schlüssel müssen vorhanden sein.
|
||
- Unterkategorien dürfen null sein, wenn nicht ausreichend ableitbar.
|
||
|
||
Ursprüngliche Aufgabe:
|
||
{base_prompt}
|
||
"""
|
||
time.sleep(RETRY_SLEEP_SEC)
|
||
continue
|
||
|
||
return {
|
||
"success": False,
|
||
"error": str(e),
|
||
"last_error": last_error,
|
||
"model": model_name,
|
||
"result": None,
|
||
"raw_parsed_output": None,
|
||
"raw_content": raw_content,
|
||
"raw_response_debug": raw_response_debug,
|
||
"inference_time_sec": time.perf_counter() - start,
|
||
"prompt_tokens": prompt_tokens,
|
||
"completion_tokens": completion_tokens,
|
||
"total_tokens": total_tokens,
|
||
}
|
||
|
||
|
||
# =========================
|
||
# FLATTEN / SAVE
|
||
# =========================
|
||
|
||
def flatten_result(record):
|
||
result = record.get("result") or {}
|
||
|
||
flat = {
|
||
"model": record.get("model"),
|
||
"iteration": record.get("iteration"),
|
||
"row_index": record.get("row_index"),
|
||
"row_number_in_run": record.get("row_number_in_run"),
|
||
"unique_id": record.get("unique_id"),
|
||
"MedDatum": record.get("MedDatum"),
|
||
|
||
"success": record.get("success"),
|
||
"error": record.get("error"),
|
||
"last_error": record.get("last_error"),
|
||
|
||
"json_parse_success": result.get("json_parse_success", False),
|
||
"required_fields_present": result.get("required_fields_present", False),
|
||
"required_schema_success": result.get("required_schema_success", False),
|
||
"clinical_range_valid": result.get("clinical_range_valid", False),
|
||
"clinical_output_valid": result.get("clinical_output_valid", False),
|
||
"klassifizierbar_is_bool": result.get("klassifizierbar_is_bool"),
|
||
"edss_logic_valid": result.get("edss_logic_valid"),
|
||
"all_functional_systems_valid_when_present": result.get("all_functional_systems_valid_when_present"),
|
||
|
||
"inference_time_sec": record.get("inference_time_sec"),
|
||
"prompt_tokens": record.get("prompt_tokens"),
|
||
"completion_tokens": record.get("completion_tokens"),
|
||
"total_tokens": record.get("total_tokens"),
|
||
|
||
"raw_content": record.get("raw_content"),
|
||
"raw_parsed_output": json.dumps(
|
||
record.get("raw_parsed_output"),
|
||
ensure_ascii=False
|
||
),
|
||
|
||
"reason": result.get("reason"),
|
||
"klassifizierbar": result.get("klassifizierbar"),
|
||
|
||
"raw_certainty_percent": result.get("raw_certainty_percent"),
|
||
"certainty_percent": result.get("certainty_percent"),
|
||
"certainty_present": result.get("certainty_present"),
|
||
"certainty_percent_is_numeric": result.get("certainty_percent_is_numeric"),
|
||
"certainty_percent_in_valid_range": result.get("certainty_percent_in_valid_range"),
|
||
|
||
"raw_EDSS": result.get("raw_EDSS"),
|
||
"EDSS_numeric": result.get("EDSS_numeric"),
|
||
"EDSS": result.get("EDSS"),
|
||
"EDSS_is_numeric": result.get("EDSS_is_numeric"),
|
||
"EDSS_in_valid_range": result.get("EDSS_in_valid_range"),
|
||
|
||
"missing_required_fields": json.dumps(
|
||
result.get("missing_required_fields", []),
|
||
ensure_ascii=False
|
||
),
|
||
"missing_subcategory_fields": json.dumps(
|
||
result.get("missing_subcategory_fields", []),
|
||
ensure_ascii=False
|
||
),
|
||
}
|
||
|
||
raw_subcats = result.get("raw_subcategories", {})
|
||
num_subcats = result.get("subcategories", {})
|
||
subcat_flags = result.get("subcategory_validation", {})
|
||
|
||
for name in FUNCTIONAL_SYSTEM_RANGES:
|
||
flags = subcat_flags.get(name, {})
|
||
|
||
flat[f"raw_subcat_{name}"] = raw_subcats.get(name)
|
||
flat[f"numeric_subcat_{name}"] = num_subcats.get(name)
|
||
flat[f"subcat_{name}_is_numeric"] = flags.get("is_numeric", False)
|
||
flat[f"subcat_{name}_in_valid_range"] = flags.get("in_valid_range", False)
|
||
flat[f"subcat_{name}_valid_when_present"] = flags.get("valid_when_present", False)
|
||
flat[f"subcat_{name}"] = num_subcats.get(name)
|
||
|
||
return flat
|
||
|
||
|
||
def append_jsonl(path, record):
|
||
with open(path, "a", encoding="utf-8") as f:
|
||
f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
||
f.flush()
|
||
os.fsync(f.fileno())
|
||
|
||
|
||
def append_csv(path, record):
|
||
one = pd.DataFrame([flatten_result(record)])
|
||
file_exists = Path(path).exists()
|
||
one.to_csv(path, mode="a", header=not file_exists, index=False)
|
||
|
||
|
||
def save_json(path, records):
|
||
with open(path, "w", encoding="utf-8") as f:
|
||
json.dump(records, f, indent=2, ensure_ascii=False)
|
||
|
||
|
||
def save_csv(path, records):
|
||
df = pd.DataFrame([flatten_result(r) for r in records])
|
||
df.to_csv(path, index=False)
|
||
|
||
|
||
def summarize_records(records):
|
||
df = pd.DataFrame([flatten_result(r) for r in records])
|
||
|
||
if df.empty:
|
||
return pd.DataFrame()
|
||
|
||
def rate(col):
|
||
if col not in df.columns:
|
||
return None
|
||
return df[col].fillna(False).astype(bool).mean()
|
||
|
||
summary = {
|
||
"model": df["model"].iloc[0],
|
||
"n_total": len(df),
|
||
"success_rate": rate("success"),
|
||
"json_parse_success_rate": rate("json_parse_success"),
|
||
"required_schema_success_rate": rate("required_schema_success"),
|
||
"clinical_output_valid_rate": rate("clinical_output_valid"),
|
||
"edss_logic_valid_rate": rate("edss_logic_valid"),
|
||
"EDSS_numeric_rate": rate("EDSS_is_numeric"),
|
||
"EDSS_valid_range_rate": rate("EDSS_in_valid_range"),
|
||
"klassifizierbar_true_rate": (
|
||
df["klassifizierbar"].fillna(False).astype(bool).mean()
|
||
if "klassifizierbar" in df.columns
|
||
else None
|
||
),
|
||
"mean_inference_time_sec": pd.to_numeric(
|
||
df["inference_time_sec"],
|
||
errors="coerce"
|
||
).mean(),
|
||
}
|
||
|
||
return pd.DataFrame([summary])
|
||
|
||
|
||
# =========================
|
||
# PARALLEL ROW PROCESSING
|
||
# =========================
|
||
|
||
def process_one_row(payload, model_config, iteration):
|
||
idx, row_dict, row_number, total_rows = payload
|
||
|
||
row = pd.Series(row_dict)
|
||
patient_text = build_patient_text(row)
|
||
|
||
record = run_inference(patient_text, model_config)
|
||
|
||
record["iteration"] = iteration
|
||
record["row_index"] = int(idx)
|
||
record["row_number_in_run"] = int(row_number)
|
||
record["unique_id"] = row.get("unique_id", f"row_{idx}")
|
||
record["MedDatum"] = row.get("MedDatum", None)
|
||
|
||
return record
|
||
|
||
|
||
# =========================
|
||
# MAIN
|
||
# =========================
|
||
|
||
if __name__ == "__main__":
|
||
|
||
run_timestamp = now_timestamp()
|
||
|
||
results_root = Path(RESULTS_ROOT)
|
||
run_root = results_root / f"run_{run_timestamp}"
|
||
run_root.mkdir(parents=True, exist_ok=True)
|
||
|
||
print(f"Results root: {run_root}")
|
||
|
||
df = pd.read_csv(INPUT_CSV, sep=";")
|
||
|
||
if MAX_ROWS is not None:
|
||
df = df.head(MAX_ROWS)
|
||
|
||
total_rows = len(df)
|
||
|
||
print(f"Loaded rows: {total_rows}")
|
||
print(f"Parallel workers: {PARALLEL_WORKERS}")
|
||
print(f"Batch size: {BATCH_SIZE}")
|
||
print(f"Iterations: {NUM_ITERATIONS}")
|
||
print(f"Models: {[m['model_name'] for m in MODEL_CONFIGS]}")
|
||
|
||
row_payloads = [
|
||
(idx, row.to_dict(), row_number, total_rows)
|
||
for row_number, (idx, row) in enumerate(df.iterrows(), start=1)
|
||
]
|
||
|
||
all_summaries = []
|
||
|
||
for model_config in MODEL_CONFIGS:
|
||
model_name = model_config["model_name"]
|
||
safe_model = safe_dir_name(model_name)
|
||
|
||
model_dir = run_root / safe_model
|
||
model_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
print("\n" + "#" * 80)
|
||
print(f"MODEL: {model_name}")
|
||
print(f"Saving to: {model_dir}")
|
||
print("#" * 80)
|
||
|
||
model_records = []
|
||
model_start = time.perf_counter()
|
||
|
||
for iteration in range(1, NUM_ITERATIONS + 1):
|
||
print("\n" + "=" * 80)
|
||
print(f"MODEL {model_name} | ITERATION {iteration}/{NUM_ITERATIONS}")
|
||
print("=" * 80)
|
||
|
||
iteration_start = time.perf_counter()
|
||
iteration_results = []
|
||
|
||
incremental_jsonl_path = model_dir / f"{safe_model}_iter_{iteration}_{run_timestamp}_incremental.jsonl"
|
||
incremental_csv_path = model_dir / f"{safe_model}_iter_{iteration}_{run_timestamp}_incremental.csv"
|
||
|
||
completed = 0
|
||
|
||
for batch_start in range(0, len(row_payloads), BATCH_SIZE):
|
||
batch = row_payloads[batch_start:batch_start + BATCH_SIZE]
|
||
|
||
print(
|
||
f"\nSubmitting rows {batch_start + 1}-"
|
||
f"{batch_start + len(batch)} / {total_rows}"
|
||
)
|
||
|
||
with ThreadPoolExecutor(max_workers=PARALLEL_WORKERS) as executor:
|
||
futures = [
|
||
executor.submit(
|
||
process_one_row,
|
||
payload,
|
||
model_config,
|
||
iteration
|
||
)
|
||
for payload in batch
|
||
]
|
||
|
||
for future in as_completed(futures):
|
||
record = future.result()
|
||
|
||
completed += 1
|
||
iteration_results.append(record)
|
||
model_records.append(record)
|
||
|
||
append_jsonl(incremental_jsonl_path, record)
|
||
append_csv(incremental_csv_path, record)
|
||
|
||
if record["success"]:
|
||
result = record.get("result") or {}
|
||
|
||
print(
|
||
f"Done {completed}/{total_rows} | "
|
||
f"row={record.get('row_number_in_run')} | "
|
||
f"klassifizierbar={result.get('klassifizierbar')} | "
|
||
f"EDSS={result.get('EDSS_numeric')} | "
|
||
f"edss_logic_valid={result.get('edss_logic_valid')} | "
|
||
f"clinical_output_valid={result.get('clinical_output_valid')} | "
|
||
f"time={record.get('inference_time_sec'):.2f}s"
|
||
)
|
||
|
||
else:
|
||
print(
|
||
f"Done {completed}/{total_rows} | "
|
||
f"row={record.get('row_number_in_run')} | "
|
||
f"ERROR={record.get('error')}"
|
||
)
|
||
|
||
if STOP_ON_FIRST_ERROR:
|
||
raise RuntimeError(record.get("error"))
|
||
|
||
iteration_results = sorted(
|
||
iteration_results,
|
||
key=lambda r: r.get("row_number_in_run", 10**9)
|
||
)
|
||
|
||
iter_json_path = model_dir / f"{safe_model}_results_iter_{iteration}_{run_timestamp}.json"
|
||
iter_csv_path = model_dir / f"{safe_model}_results_iter_{iteration}_{run_timestamp}.csv"
|
||
|
||
save_json(iter_json_path, iteration_results)
|
||
save_csv(iter_csv_path, iteration_results)
|
||
|
||
elapsed = time.perf_counter() - iteration_start
|
||
|
||
print(f"\nIteration {iteration} complete.")
|
||
print(f"JSON: {iter_json_path}")
|
||
print(f"CSV: {iter_csv_path}")
|
||
print(f"Time: {elapsed / 60:.2f} min")
|
||
|
||
model_records = sorted(
|
||
model_records,
|
||
key=lambda r: (
|
||
r.get("iteration", 10**9),
|
||
r.get("row_number_in_run", 10**9)
|
||
)
|
||
)
|
||
|
||
model_json_path = model_dir / f"{safe_model}_all_results_{run_timestamp}.json"
|
||
model_csv_path = model_dir / f"{safe_model}_all_results_{run_timestamp}.csv"
|
||
model_summary_path = model_dir / f"{safe_model}_summary_{run_timestamp}.csv"
|
||
|
||
save_json(model_json_path, model_records)
|
||
save_csv(model_csv_path, model_records)
|
||
|
||
summary_df = summarize_records(model_records)
|
||
summary_df["model_total_wall_time_sec"] = time.perf_counter() - model_start
|
||
summary_df["model_total_wall_time_min"] = summary_df["model_total_wall_time_sec"] / 60
|
||
summary_df.to_csv(model_summary_path, index=False)
|
||
|
||
all_summaries.append(summary_df)
|
||
|
||
print(f"\nModel complete: {model_name}")
|
||
print(f"All JSON: {model_json_path}")
|
||
print(f"All CSV: {model_csv_path}")
|
||
print(f"Summary: {model_summary_path}")
|
||
|
||
if all_summaries:
|
||
combined = pd.concat(all_summaries, ignore_index=True)
|
||
combined_path = run_root / f"all_models_summary_{run_timestamp}.csv"
|
||
combined.to_csv(combined_path, index=False)
|
||
print(f"\nCombined summary: {combined_path}")
|
||
|
||
print("\nAll models and iterations completed.")
|
||
##
|
||
|