delete the test

experience branch of Voxtral
test
2026-01-24 15:58:33 +01:00 · 2026-01-24 14:40:12 +01:00 · 2026-01-24 14:38:12 +01:00 · 2026-01-16 14:49:53 +01:00 · 2026-01-16 14:40:11 +01:00 · 2026-01-16 14:36:55 +01:00
8 changed files with 3697 additions and 7 deletions
@@ -49,7 +49,7 @@ Thumbs.db
 # ======================
 # Experimental scripts
 # ======================
-2moodle.py
+#2moodle.py
-app.old_inside_pipeline.py
+#app.old_inside_pipeline.py
-feedback_bewertung.py
+#feedback_bewertung.py
@@ -1,6 +1,6 @@
 # 🩺 OSCE Feedback Evaluator
-This tool automates the feedback process for clinical exams. Just point it at a directory of files and let it do the heavy lifting.
+This tool automates the feedback process for OSCE clinical exams. Just point it at a directory of files and let it do the heavy lifting.
 ---
@@ -19,18 +19,20 @@ python3 -m venv venv
 source venv/bin/activate
 # (Windows users: .\venv\Scripts\activate)
-
+```
 ### 2. Get the Code
 Pull the latest changes from the repository:
 git pull https://github.com/Shahin-rmz/OSCE-Feedback-Evaluator.git
 ```bash
 git pull https://github.com/Shahin-rmz/OSCE-Feedback-Evaluator.git
 ```
 ### 3. Install Dependencies
 Make sure you are in the root directory where requirements.txt is located:
-Bash
+
 ```bash
 pip install -r requirements.txt
@@ -48,3 +50,15 @@ python3 osce_pipeline.py
 ### 5. Process your Data
 When prompted, paste the path of the directory containing the files the app should work on.
 ---
 📝 Notes
 Make sure your .env file is set up inside the /pipeline folder if the script requires API keys or specific credentials.
 Use the .env.example file as a template if you're setting this up for the first time.
@@ -0,0 +1,191 @@
 # %% Feedback_Bewertung
 import openai
 import os
 from pathlib import Path
 client = openai.OpenAI(
    api_key="sk--T3QiY4gBE67o9oSxEOqxw",
    base_url="http://pluto/v1"
 )
 EVAL_PROMPT = """
 Du bist ein strenger, objektiver Bewertender. 
 Bewerte das folgende Feedback anhand der untenstehenden Kriterien.
 Für jedes Kriterium musst du GENAU folgendes liefern:
 - answer: true oder false  
 - explanation: eine kurze, klare Begründung auf Deutsch, basierend ausschließlich auf dem gegebenen Feedback
 KRITERIEN:
 1. Ist das Feedback zeitnah? (<48 Stunden)
 2. Enthält das Feedback narrative Kommentare?
 3. Ist der Ton positiv und/oder unterstützend?
 4. Beschreibt das Feedback beobachtbares Verhalten in klarer, eindeutiger Sprache?
 5. Verstärkt das Feedback, was gut gemacht wurde?
 6. Zeigt das Feedback Bereiche auf, in denen Verbesserungen nötig sind?
 7. Enthält das Feedback konkrete, spezifische Strategien zur Verbesserung?
 8. Bezieht sich das Feedback auf definierte Standards (z. B. EPA, Leistungsniveaus)?
 9. Passt der Anspruch/die Komplexität des Feedbacks zum Lernstand der Lernenden?
 Gib die Ergebnisse AUSSCHLIESSLICH als JSON gemäß dem bereitgestellten Schema zurück.
 """
 from pydantic import BaseModel
 class CriterionResult(BaseModel):
    answer: bool | None
    explanation: str
 class FeedbackEvaluation(BaseModel):
    timely: CriterionResult
    narrative_comments: CriterionResult
    positive_supportive_tone: CriterionResult
    clear_language: CriterionResult
    reinforces_strengths: CriterionResult
    identifies_improvements: CriterionResult
    includes_strategies: CriterionResult
    relates_to_standards: CriterionResult
    congruent_with_learner_level: CriterionResult
 ##
 # %% Main
 input_dir = "./cruscloud/AudioFeedbackProject/Transkripte"
 files = list(Path(input_dir).glob("*.txt"))
 results = {}
 for f in files:
    text = f.read_text(encoding="utf-8")
    response = client.responses.parse(
        model="GPT-OSS-120B",
        input=[
            {"role": "system", "content": EVAL_PROMPT},
            {"role": "user", "content": text},
        ],
        text_format=FeedbackEvaluation,
    )
    results[f.name] = response.output_parsed
 # Save results
 import json
 with open("feedback_evaluations.json", "w", encoding="utf-8") as out:
    json.dump({k: v.dict() for k, v in results.items()}, out, indent=4)
 ##
 # %% Feedback_Bewertung_Moodle
 import openai
 import os
 from pathlib import Path
 client = openai.OpenAI(
    api_key="sk--T3QiY4gBE67o9oSxEOqxw",
    base_url="http://pluto/v1"
 )
 EVAL_PROMPT = """
 Du bist ein strenger, objektiver Bewertender für medizinische Lehre.
 Bewerte das folgende Feedback anhand der Kursinhalte "Feedback in der Lehre: Basics".
 Für jedes Kriterium musst du GENAU folgendes liefern:
 - score: Zahl von 1 (beste Bewertung) bis 5 (gescheitert)
 - explanation: Ausführliche Begründung auf Deutsch mit:
  a) Zitat aus dem Kursmaterial das das Kriterium definiert
  b) Konkrete Analyse des Feedback-Textes mit Zitaten
  c) Klare Verknüpfung zwischen Kursanforderung und Feedback-Umsetzung
 KRITERIEN (basierend auf Kursmaterial):
 1. KONKRETHEIT (Bild 4: Merkmale von Feedback)
   "Das Feedback sollte so konkret wie möglich sein. Die Wiedergabe beobachteter Beispiele ist hilfreich."
   Bewertung: Enthält das Feedback beobachtbare Beispiele statt Verallgemeinerungen?
 2. ICH-BOTSCHAFTEN (Feedbackregeln)
   "Ein Feedback … wird in „Ich-Botschaften“ ausgedrückt. … ist nicht (ab)wertend."
   Bewertung: Wird subjektive Wahrnehmung in Ich-Formulierungen dargestellt?
 3. STRUKTUR (WWW/BEB-Prinzip)
   WWW: "1. Wahrnehmung: Ich habe gesehen ... 2. Wirkung: ... das hat mich nervös gemacht. 3. Wunsch: Ich wünsche mir ..."
   BEB: "1. Beobachtung: Ich habe gesehen ... 2. Empfehlung: Ich empfehle ... 3. Begründung: Auf diese Weise vermeiden Sie ..."
   Bewertung: Folgt das Feedback einer klaren Struktur (WWW oder BEB)?
 4. WERTSCHÄTZUNG (Bild 4: Merkmale von Feedback)
   "Feedback ... auf Augenhöhe: Feedbackgeber und Feedbackempfänger gehen respektvoll und wertschätzend miteinander um."
   Bewertung: Wird respektvoll und auf Augenhöhe kommuniziert?
 5. VERBESSERUNGSEMPFEHLUNG (Feedbackregeln)
   "Ein Feedback … endet mit einer wertschätzenden Anregung für zukünftige Verbesserungen."
   Bewertung: Gibt es konkrete, wertschätzende Handlungsempfehlungen?
 SCORING-ANLEITUNG:
 1 = Vollständige Umsetzung (exzellentes Beispiel)
 2 = Gute Umsetzung mit minimalen Lücken
 3 = Grundlegende Umsetzung mit signifikanten Mängeln
 4 = Unzureichende Umsetzung (wichtige Elemente fehlen)
 5 = Keine erkennbare Umsetzung (kriterienwidrig)
 Gib die Ergebnisse AUSSCHLIESSLICH als JSON gemäß dem bereitgestellten Schema zurück.
 """
 from pydantic import BaseModel
 class CriterionResult(BaseModel):
    score: int  # 1-5 (1=best, 5=failed)
    explanation: str
 class FeedbackEvaluation(BaseModel):
    konkretes_feedback: CriterionResult
    ich_botschaften: CriterionResult
    struktur: CriterionResult
    wertschaetzung: CriterionResult
    verbesserungsempfehlung: CriterionResult
 ##
 # %% Main
 input_dir = "./cruscloud/AudioFeedbackProject/Transkripte"
 files = list(Path(input_dir).glob("*.txt"))
 results = {}
 for f in files:
    text = f.read_text(encoding="utf-8")
    response = client.chat.completions.create(
        model="GPT-OSS-120B",
        messages=[
            {"role": "system", "content": EVAL_PROMPT},
            {"role": "user", "content": text},
        ],
        response_format={"type": "json_object"},
        temperature=0.1
    )
    # Parse the JSON response
    import json
    parsed_response = json.loads(response.choices[0].message.content)
    results[f.name] = FeedbackEvaluation(**parsed_response)
 # Save results
 import json
 with open("feedback_evaluations.json", "w", encoding="utf-8") as out:
    json.dump({k: v.dict() for k, v in results.items()}, out, indent=4, ensure_ascii=False)
 ##
@@ -1,3 +1,6 @@
 API_KEY=YOUR_API_KEY
 BASE_URL=SERVER_ADDRESS
 # Optional
 OUTPUT_ADMIN_DIR=admin_dir
 OUTPUT_STUDENT_DIR=studi_dir
@@ -0,0 +1,404 @@
 # %% Isabella
 import openai
 import os
 from pathlib import Path
 import datetime
 import time
 import json as json_module
 import csv
 from pydantic import BaseModel
 import math
 client = openai.OpenAI(
    api_key="xxx",
    base_url="xxx"
 )
 EVAL_PROMPT = '''
 Du bist ein strenger, objektiver Bewertender für medizinische Lehre. Bewerte das folgende Feedback anhand der Kursinhalte "Feedback in der Lehre: Basics". KRITERIEN (basierend auf Kursmaterial): A1 PERSPEKTIVE (Ich-Botschaften) Bewertung: Wird subjektive Wahrnehmung in Ich-Formulierungen dargestellt? A2 RESPEKT & WERTFREIHEIT Bewertung: Wird respektvoll und wertfrei kommuniziert? B1 KONKRETHEIT "Das Feedback sollte so konkret wie möglich sein. Die Wiedergabe beobachteter Beispiele ist hilfreich." Bewertung: Enthält das Feedback beobachtbare Beispiele statt Verallgemeinerungen? B2 TRENNUNG VON BEOBACHTUNG UND INTERPRETATION Bewertung: Wird zwischen beobachtbaren Fakten und Interpretationen unterschieden? C1 STRUKTURIERTE LOGIK (WWW/BEB-Prinzip) WWW: "1. Wahrnehmung: Ich habe gesehen ... 2. Wirkung: ... das hat mich nervös gemacht. 3. Wunsch: Ich wünsche mir ..." BEB: "1. Beobachtung: Ich habe gesehen ... 2. Empfehlung: Ich empfehle ... 3. Begründung: Auf diese Weise vermeiden Sie ..." Bewertung: Folgt das Feedback einer klaren Struktur (WWW oder BEB)? D1 ZUKUNGSORIENTIERTE EMPFEHLUNG Bewertung: Gibt es konkrete, zukunftsorientierte Handlungsempfehlungen, die wertschätzend formuliert sind? D2 WERTSCHÄTZENDER ABSCHLUSS Bewertung: Schließt das Feedback wertschätzend ab? E1 KOMMUNIKATIONSEBENEN "Vier Seiten einer Nachricht: Sachinhalt, Selbstoffenbarung, Beziehung, Appell" Bewertung: Berücksichtigt das Feedback die verschiedenen Kommunikationsebenen? F1 FÖRDERUNG VON REFLEXION Bewertung: Fördert das Feedback die Reflexion und das Lernen?
 SCORING-ANLEITUNG:
 1 = Vollständige Umsetzung (exzellentes Beispiel)
 2 = Gute Umsetzung mit minimalen Lücken
 3 = Grundlegende Umsetzung mit signifikanten Mängeln
 4 = Unzureichende Umsetzung (wichtige Elemente fehlen)
 5 = Keine erkennbare Umsetzung (kriterienwidrig)
 WICHTIG: Gib die Ergebnisse AUSSCHLIESSLICH als JSON mit EXAKT folgender Struktur zurück:
 {
  "scores": {
    "A1": {"score": 1, "justification": "Begründung hier"},
    "A2": {"score": 1, "justification": "Begründung hier"},
    "B1": {"score": 1, "justification": "Begründung hier"},
    "B2": {"score": 1, "justification": "Begründung hier"},
    "C1": {"score": 1, "justification": "Begründung hier"},
    "D1": {"score": 1, "justification": "Begründung hier"},
    "D2": {"score": 1, "justification": "Begründung hier"},
    "E1": {"score": 1, "justification": "Begründung hier"},
    "F1": {"score": 1, "justification": "Begründung hier"}
  },
  "total_score": 0,
  "quality_level": "Beispiel-Qualitätsstufe",
  "strengths": ["Stärke 1", "Stärke 2"],
  "weaknesses": ["Schwäche 1", "Schwäche 2"],
  "improvement_suggestions": ["Vorschlag 1", "Vorschlag 2", "Vorschlag 3"]
 }
 '''
 class ScoreItem(BaseModel):
    score: int  # 1-5 (1=excellent, 5=failed)
    justification: str
 class EvaluationResult(BaseModel):
    scores: dict[str, ScoreItem]
    total_score: int
    quality_level: str
    strengths: list[str]
    weaknesses: list[str]
    improvement_suggestions: list[str]
 ##
 # %% Main
 QUALITY_LEVEL_MAP = {
    1: "Exzellent (1)",
    2: "Gut (2)",
    3: "Befriedigend (3)",
    4: "Ausreichend (4)",
    5: "Mangelhaft/Ungenügend (5)",
    0: "Fehlerhaft/Unbekannt"
 }
 input_dir = "./cruscloud/Teil3/Transkripte/"
 # Hardcoded output directory - CHANGE THIS PATH AS NEEDED
 output_dir = "./cruscloud/Teil3/Evaluations_moodle_isabella2"
 Path(output_dir).mkdir(parents=True, exist_ok=True)
 # Create timing log file
 timing_log_path = Path(output_dir) / "evaluation_timing.log"
 with open(timing_log_path, "w", encoding="utf-8") as log:
    log.write(f"FEEDBACK EVALUATION TIMING LOG - {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
    log.write("="*80 + "\n\n")
 # Create CSV timing file with headers
 csv_timing_path = Path(output_dir) / "evaluation_timings.csv"
 with open(csv_timing_path, "w", encoding="utf-8", newline="") as csv_file:
    csv_writer = csv.writer(csv_file, delimiter=",")
    # Write CSV header
    csv_writer.writerow([
        "Filename",
        "Total_Time_sec",
        "API_Evaluation_Time_sec",
        "Start_Time",
        "End_Time",
        "Status",
        "Average_Score",
        "Quality_Level",
        "A1_Score",
        "A2_Score",
        "B1_Score",
        "B2_Score",
        "C1_Score",
        "D1_Score",
        "D2_Score",
        "E1_Score",
        "F1_Score"
    ])
 files = list(Path(input_dir).glob("*.txt"))
 results = {}
 total_start = time.time()
 for f in files:
    file_start = time.time()
    start_time_str = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print(f"\n{'='*50}")
    print(f"Beginne Bewertung: {f.name}")
    print(f"Startzeit: {datetime.datetime.now().strftime('%H:%M:%S')}")
    # Read input text
    text = f.read_text(encoding="utf-8")
    # Get AI evaluation with timing
    status = "Success"
    eval_duration = 0.0
    # We'll evaluate all criteria in one call with strict JSON structure
    try:
        eval_start_api = time.time()
        response = client.chat.completions.create(
            model="GPT-OSS-120B",
            messages=[
                {"role": "system", "content": EVAL_PROMPT},
                {"role": "user", "content": text},
            ],
            response_format={"type": "json_object"},
            temperature=0.1
        )
        eval_duration = time.time() - eval_start_api # API-Zeit gemessen
        print(f"  • Gesamtbewertung (API-Laufzeit): {eval_duration:.2f} sec")
        # Parse the JSON response
        try:
            parsed_response = json_module.loads(response.choices[0].message.content)
            # Validate structure before passing to Pydantic
            required_keys = ["scores", "total_score", "quality_level",
                             "strengths", "weaknesses", "improvement_suggestions"]
            # If the response has a different structure, try to fix it
            if not all(key in parsed_response for key in required_keys):
                print(f"  ! Warnung: Ungewöhnliche Antwortstruktur erkannt. Versuche Konvertierung...")
                status = "Partial Structure"
                # Create a properly structured response
                fixed_response = {
                    "scores": {
                        "A1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
                        "A2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
                        "B1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
                        "B2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
                        "C1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
                        "D1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
                        "D2": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
                        "E1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"},
                        "F1": {"score": 5, "justification": "FEHLER: Kriterium nicht bewertet"}
                    },
                    "total_score": 0,
                    "quality_level": "Fehlerhaft",
                    "strengths": ["Strukturfehler in der Bewertung"],
                    "weaknesses": ["Antwortstruktur nicht korrekt"],
                    "improvement_suggestions": ["Überprüfen Sie die Feedback-Struktur"]
                }
                # Try to populate with available data
                if "scores" in parsed_response:
                    for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]:
                        if key in parsed_response["scores"]:
                            fixed_response["scores"][key] = parsed_response["scores"][key]
                if "total_score" in parsed_response:
                    fixed_response["total_score"] = parsed_response["total_score"]
                if "quality_level" in parsed_response and parsed_response["quality_level"]:
                    fixed_response["quality_level"] = parsed_response["quality_level"]
                if "strengths" in parsed_response and isinstance(parsed_response["strengths"], list):
                    fixed_response["strengths"] = parsed_response["strengths"]
                if "weaknesses" in parsed_response and isinstance(parsed_response["weaknesses"], list):
                    fixed_response["weaknesses"] = parsed_response["weaknesses"]
                if "improvement_suggestions" in parsed_response and isinstance(parsed_response["improvement_suggestions"], list):
                    fixed_response["improvement_suggestions"] = parsed_response["improvement_suggestions"]
                parsed_response = fixed_response
            # Create evaluation object
            evaluation = EvaluationResult(**parsed_response)
            results[f.name] = evaluation
        except json_module.JSONDecodeError as e:
            print(f"  ! JSON-Decoding-Fehler: {e}")
            print(f"  ! Antwortinhalt: {response.choices[0].message.content[:200]}...")
            status = f"JSON Error: {str(e)}"
            # Create a default evaluation with error messages
            error_explanation = f"FEHLER: Ungültige JSON-Antwort vom API. Details: {str(e)}"
            default_scores = {
                key: ScoreItem(score=5, justification=error_explanation)
                for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]
            }
            evaluation = EvaluationResult(
                scores=default_scores,
                total_score=0,
                quality_level="Fehlerhaft",
                strengths=["Bewertung fehlgeschlagen"],
                weaknesses=["Ungültiges JSON-Format"],
                improvement_suggestions=["Überprüfen Sie die Feedback-Struktur"]
            )
            results[f.name] = evaluation
    except Exception as e:
        print(f"  ! Unerwarteter Fehler: {str(e)}")
        status = f"API Error: {str(e)}"
        # Create a default evaluation with error messages
        error_explanation = f"FEHLER: Bewertung fehlgeschlagen. Details: {str(e)}"
        default_scores = {
            key: ScoreItem(score=5, justification=error_explanation)
            for key in ["A1", "A2", "B1", "B2", "C1", "D1", "D2", "E1", "F1"]
        }
        evaluation = EvaluationResult(
            scores=default_scores,
            total_score=0,
            quality_level="Systemfehler",
            strengths=["Bewertung fehlgeschlagen"],
            weaknesses=[f"Technischer Fehler: {str(e)}"],
            improvement_suggestions=["Kontaktieren Sie den Support"]
        )
        results[f.name] = evaluation
    # Calculate the AVERAGE score (not sum)
    all_scores = [evaluation.scores[key].score for key in evaluation.scores.keys()]
    valid_scores = [s for s in all_scores if isinstance(s, int)]
    average_score = sum(valid_scores) / len(valid_scores) if valid_scores else 5.0
    # Runden auf die nächste ganze Zahl (natürliche Zahl-Format)
    rounded_average_score = int(round(average_score))
    # --- Konkrete Qualitätsstufen zuweisen (Neu implementiert) ---
    if status in ["JSON Error", "API Error", "Systemfehler", "Partial Structure"]:
        final_quality_level = evaluation.quality_level # Behält Fehlerstatus bei
    else:
        # Weist die definierte Qualitätsstufe basierend auf dem Durchschnitt zu
        final_quality_level = QUALITY_LEVEL_MAP.get(rounded_average_score, "Fehlerhaft/Unbekannt")
    # Überschreibe den Wert im evaluation-Objekt
    evaluation.quality_level = final_quality_level
    # ----------------------------------------------------------------
    # --- Zeitmessungs-Korrektur (Überprüfung) ---
    # Gesamtzeit für die Datei (bis zum Ende der Verarbeitung)
    total_file_duration = time.time() - file_start
    # Lokale Verarbeitungszeit: Gesamtzeit minus der reinen API-Wartezeit.
    local_processing_time = total_file_duration - eval_duration
    if local_processing_time < 0: # Sicherstellen, dass die Zeit nicht negativ wird
        local_processing_time = 0.0
    # -------------------------------------
    # Generate detailed text report with timing
    report = f'''FEEDBACK-EVALUATION BERICHT
 ============================
 Eingabedatei: {f.name}
 Erstellungsdatum: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
 Kursreferenz: "Feedback in der Lehre: Basics" (Hochschulmedizin Dresden)
 VERARBEITUNGSZEITEN
 ----------------------------------------
 Gesamtverarbeitung: {total_file_duration:.2f} Sekunden
  • API-Bewertungszeit: {eval_duration:.2f} Sekunden
  • Lokale Verarbeitungszeit (Lesen, JSON, Bericht): {local_processing_time:.2f} Sekunden
 '''
    # Add evaluation results
    report += f'''
 KRITERIENBEWERTUNG
 ----------------------------------------
 A1 PERSPEKTIVE (Ich-Botschaften): {evaluation.scores['A1'].score}/5
 (1=exzellent, 5=nicht bestanden)
 Begründung: {evaluation.scores['A1'].justification}
 A2 RESPEKT & WERTFREIHEIT: {evaluation.scores['A2'].score}/5
 (1=exzellent, 5=nicht bestanden)
 Begründung: {evaluation.scores['A2'].justification}
 B1 KONKRETHEIT: {evaluation.scores['B1'].score}/5
 (1=exzellent, 5=nicht bestanden)
 Begründung: {evaluation.scores['B1'].justification}
 B2 TRENNUNG VON BEOBACHTUNG UND INTERPRETATION: {evaluation.scores['B2'].score}/5
 (1=exzellent, 5=nicht bestanden)
 Begründung: {evaluation.scores['B2'].justification}
 C1 STRUKTURIERTE LOGIK (WWW/BEB): {evaluation.scores['C1'].score}/5
 (1=exzellent, 5=nicht bestanden)
 Begründung: {evaluation.scores['C1'].justification}
 D1 ZUKUNGSORIENTIERTE EMPFEHLUNG: {evaluation.scores['D1'].score}/5
 (1=exzellent, 5=nicht bestanden)
 Begründung: {evaluation.scores['D1'].justification}
 D2 WERTSCHÄTZENDER ABSCHLUSS: {evaluation.scores['D2'].score}/5
 (1=exzellent, 5=nicht bestanden)
 Begründung: {evaluation.scores['D2'].justification}
 E1 KOMMUNIKATIONSEBENEN: {evaluation.scores['E1'].score}/5
 (1=exzellent, 5=nicht bestanden)
 Begründung: {evaluation.scores['E1'].justification}
 F1 FÖRDERUNG VON REFLEXION: {evaluation.scores['F1'].score}/5
 (1=exzellent, 5=nicht bestanden)
 Begründung: {evaluation.scores['F1'].justification}
 GESAMTBEWERTUNG
 ----------------------------------------
 Durchschnittliche Bewertung: {rounded_average_score}/5
 (1=exzellent, 5=nicht bestanden)
 Qualitätsstufe: {evaluation.quality_level}
 Stärken:
 '''
    for strength in evaluation.strengths:
        report += f"- {strength}\n"
    report += "\nSchwächen:\n"
    for weakness in evaluation.weaknesses:
        report += f"- {weakness}\n"
    report += "\nVerbesserungsvorschläge:\n"
    for suggestion in evaluation.improvement_suggestions:
        report += f"- {suggestion}\n"
    # Save report to output directory
    output_path = Path(output_dir) / f"{f.stem}_evaluation.txt"
    with open(output_path, "w", encoding="utf-8") as out_file:
        out_file.write(report)
    # Write timing data to CSV
    with open(csv_timing_path, "a", encoding="utf-8", newline="") as csv_file:
        csv_writer = csv.writer(csv_file, delimiter=",")
        csv_writer.writerow([
            f.name,
            f"{total_file_duration:.2f}",
            f"{eval_duration:.2f}",
            start_time_str,
            datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            status,
            rounded_average_score,
            evaluation.quality_level,
            evaluation.scores['A1'].score,
            evaluation.scores['A2'].score,
            evaluation.scores['B1'].score,
            evaluation.scores['B2'].score,
            evaluation.scores['C1'].score,
            evaluation.scores['D1'].score,
            evaluation.scores['D2'].score,
            evaluation.scores['E1'].score,
            evaluation.scores['F1'].score
        ])
    # Log timing to central log file
    with open(timing_log_path, "a", encoding="utf-8") as log:
        log.write(f"Datei: {f.name}\n")
        log.write(f"Start: {datetime.datetime.now().strftime('%H:%M:%S')}\n")
        log.write(f"Dauer: {total_file_duration:.2f} Sekunden\n")
        log.write("Detailierte Zeiten:\n")
        log.write(f"  • API-Bewertung: {eval_duration:.2f} Sekunden\n")
        log.write(f"  • Lokale Verarbeitung: {local_processing_time:.2f} Sekunden\n")
        log.write("-"*50 + "\n\n")
    print(f"\nBewertungsbericht erstellt: {output_path}")
    print(f"Gesamtzeit für {f.name}: {total_file_duration:.2f} Sekunden (API: {eval_duration:.2f}, Lokal: {local_processing_time:.2f})")
    print(f"{'='*50}")
 total_duration = time.time() - total_start
 print(f"\n{'='*50}")
 print(f"ALLE BEWERTUNGEN ABGESCHLOSSEN")
 print(f"Gesamtverarbeitungszeit: {total_duration:.2f} Sekunden für {len(files)} Dateien")
 print(f"Durchschnittliche Zeit pro Datei: {total_duration/len(files):.2f} Sekunden")
 print(f"Bewertungsberichte gespeichert in: {output_dir}")
 print(f"Timing-Log aktualisiert: {timing_log_path}")
 print(f"CSV-Timing-Datei erstellt: {csv_timing_path}")
 print(f"{'='*50}")
 ##
Author	SHA1	Message	Date
shahin	4a0d398a91	delete the test	2026-01-24 15:58:33 +01:00
shahin	9ef1815b84	experience branch of Voxtral	2026-01-24 14:40:12 +01:00
shahin	28ce0c7875	test	2026-01-24 14:38:12 +01:00
shahin	67769e10ae	Small tweaks	2026-01-16 14:49:53 +01:00
shahin	f2b30c88e2	Security	2026-01-16 14:40:11 +01:00
shahin	8c640c1cf6	Add Feedbacker	2026-01-16 14:36:55 +01:00
shahin	f65b12fc12	env.example update	2026-01-06 21:02:05 +01:00
shahin	25c5be587f	adding init	2026-01-06 18:18:06 +01:00
Shahin	93aaa72e3c	Update README.md	2026-01-06 17:28:41 +01:00
Shahin	fde4dd57fe	Update README with .env file setup instructions Added notes about .env file setup for the app.	2026-01-06 17:26:29 +01:00
Shahin	7e5e8abc63	Fix formatting in README for code blocks	2026-01-06 17:11:24 +01:00
Shahin	3fc4f1a55c	Update README to include git pull command	2026-01-06 17:09:25 +01:00
Shahin	56adbaf443	Update README.md	2026-01-06 17:02:36 +01:00