adjusting the script with new paths

2026-05-19 10:13:29 +02:00
parent 98df7c70f1
commit bb9fcf20ae
2 changed files with 71 additions and 455 deletions
@@ -1258,7 +1258,7 @@


 # %% API call - Multi-model, multi-iteration EDSS + timing/resource benchmark
-#
+
 #import time
 #import json
 #import os
@@ -1279,9 +1279,9 @@
 #    print("Install with: pip install psutil")
 #
 #
-## =========================
-## CONFIGURATION
-## =========================
+# =========================
+# CONFIGURATION
+# =========================
 #
 #load_dotenv()
 #
@@ -1289,21 +1289,21 @@
 #OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")
 #
 #MODEL_CONFIGS = [
-##    {
-##        "model_name": "qwen3.6-35b-a3b",
-##        "use_response_format": False,
-##        "temperature": 0.0,
-##        "max_tokens": 4096,
-##
-##        # If your backend is vLLM / Qwen chat-template compatible,
-##        # this may reduce long hidden reasoning and JSON truncation.
-##        # If your server errors because of extra_body, set this to None.
-##        "extra_body": {
-##            "chat_template_kwargs": {
-##                "enable_thinking": False
-##            }
-##        },
-##    },
+#    {
+#        "model_name": "qwen3.6-35b-a3b",
+#        "use_response_format": False,
+#        "temperature": 0.0,
+#        "max_tokens": 4096,
+#
+#        # If your backend is vLLM / Qwen chat-template compatible,
+#        # this may reduce long hidden reasoning and JSON truncation.
+#        # If your server errors because of extra_body, set this to None.
+#        "extra_body": {
+#            "chat_template_kwargs": {
+#                "enable_thinking": False
+#            }
+#        },
+#    },
 #    {
 #        "model_name": "gemma-4-31B-it",
 #        "use_response_format": False,
@@ -1328,10 +1328,10 @@
 #NUM_ITERATIONS = 10
 #STOP_ON_FIRST_ERROR = False
 #
-## For testing, set to e.g. 2.
-## For full run, set to None.
+# For testing, set to e.g. 2.
+# For full run, set to None.
 #MAX_ROWS = 2
-## MAX_ROWS = 2
+# MAX_ROWS = 2
 #
 #MAX_TOKENS = 4096
 #TEMPERATURE = 0.0
@@ -1340,14 +1340,14 @@
 #
 #SAVE_EVERY_N_ROWS = 1
 #
-## Retries for invalid JSON / truncated JSON
+# Retries for invalid JSON / truncated JSON
 #MAX_JSON_RETRIES = 2
 #RETRY_SLEEP_SEC = 2
 #
 #
-## =========================
-## CLIENT
-## =========================
+# =========================
+# CLIENT
+# =========================
 #
 #client = OpenAI(
 #    api_key=OPENAI_API_KEY,
@@ -1355,9 +1355,9 @@
 #)
 #
 #
-## =========================
-## HELPERS
-## =========================
+# =========================
+# HELPERS
+# =========================
 #
 #def safe_dir_name(name: str) -> str:
 #    name = str(name).strip()
@@ -1433,9 +1433,9 @@
 #        return max(self.samples_mb)
 #
 #
-## =========================
-## JSON EXTRACTION
-## =========================
+# =========================
+# JSON EXTRACTION
+# =========================
 #
 #def extract_json_from_text(text):
 #    if text is None:
@@ -1587,17 +1587,17 @@
 #    return None
 #
 #
-## =========================
-## READ INSTRUCTIONS
-## =========================
+# =========================
+# READ INSTRUCTIONS
+# =========================
 #
 #with open(EDSS_INSTRUCTIONS_PATH, "r", encoding="utf-8") as f:
 #    EDSS_INSTRUCTIONS = f.read().strip()
 #
 #
-## =========================
-## PROMPT
-## =========================
+# =========================
+# PROMPT
+# =========================
 #
 #def build_prompt(patient_text):
 #    return f'''Du bist ein medizinischer Assistent für EDSS-Extraktion aus klinischen Berichten.
@@ -1676,9 +1676,9 @@
 #'''
 #
 #
-## =========================
-## VALIDATION / NORMALIZATION
-## =========================
+# =========================
+# VALIDATION / NORMALIZATION
+# =========================
 #
 #def normalize_model_output(parsed):
 #    if not isinstance(parsed, dict):
@@ -1755,9 +1755,9 @@
 #    return parsed
 #
 #
-## =========================
-## API CALL
-## =========================
+# =========================
+# API CALL
+# =========================
 #
 #def make_chat_completion(model_config, prompt):
 #    model_name = model_config["model_name"]
@@ -1794,9 +1794,9 @@
 #    return client.chat.completions.create(**kwargs)
 #
 #
-## =========================
-## INFERENCE FUNCTION WITH RETRIES
-## =========================
+# =========================
+# INFERENCE FUNCTION WITH RETRIES
+# =========================
 #
 #def run_inference(patient_text, model_config):
 #    model_name = model_config["model_name"]
@@ -1912,9 +1912,9 @@
 #    }
 #
 #
-## =========================
-## BUILD PATIENT TEXT
-## =========================
+# =========================
+# BUILD PATIENT TEXT
+# =========================
 #
 #def build_patient_text(row):
 #    return (
@@ -1925,9 +1925,9 @@
 #    )
 #
 #
-## =========================
-## FLATTEN RESULTS FOR CSV
-## =========================
+# =========================
+# FLATTEN RESULTS FOR CSV
+# =========================
 #
 #def flatten_result(record):
 #    flat = {
@@ -2012,9 +2012,9 @@
 #    return pd.DataFrame([summary])
 #
 #
-## =========================
-## INCREMENTAL SAVE HELPERS
-## =========================
+# =========================
+# INCREMENTAL SAVE HELPERS
+# =========================
 #
 #def append_jsonl(path, record):
 #    with open(path, "a", encoding="utf-8") as f:
@@ -2030,9 +2030,9 @@
 #    df_one.to_csv(path, mode="a", header=not file_exists, index=False)
 #
 #
-## =========================
-## MAIN LOOP
-## =========================
+# =========================
+# MAIN LOOP
+# =========================
 #
 #if __name__ == "__main__":
 #
@@ -2277,26 +2277,26 @@ MODEL_CONFIGS = [
        "max_tokens": 4096,
        "extra_body": None,
    },
-    # {
-    #     "model_name": "GPT-OSS-120B",
-    #     "use_response_format": True,
-    #     "temperature": 0.0,
-    #     "max_tokens": 4096,
-    #     "extra_body": None,
-    # },
+    {
+        "model_name": "GPT-OSS-120B",
+        "use_response_format": True,
+        "temperature": 0.0,
+        "max_tokens": 4096,
+        "extra_body": None,
+    },
 ]

-INPUT_CSV = "/home/shahin/Lab/Doktorarbeit/Barcelona/Data/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv"
-EDSS_INSTRUCTIONS_PATH = "/home/shahin/Lab/Doktorarbeit/Barcelona/attach/Komplett.txt"
+INPUT_CSV ="/home/shahin/Lab/Doktorarbeit/Barcelona/data/processed/MS_Briefe_400_with_unique_id_SHA3_explore_cleaned_unique.csv"
+EDSS_INSTRUCTIONS_PATH = "/home/shahin/Lab/Doktorarbeit/Barcelona/prompts/Komplett.txt"

-RESULTS_ROOT = "/home/shahin/Lab/Doktorarbeit/Barcelona/results_edss_benchmark"
+RESULTS_ROOT = "/home/shahin/Lab/Doktorarbeit/Barcelona/results/benchmark_runs"

-NUM_ITERATIONS = 10
+NUM_ITERATIONS = 2
 STOP_ON_FIRST_ERROR = False

 # For testing, set to e.g. 2.
 # For full run, set to None.
-MAX_ROWS = None
+MAX_ROWS = 2
 # MAX_ROWS = 2

 MAX_TOKENS = 4096