68 lines
2 KiB
Python
68 lines
2 KiB
Python
import whisper
|
|
import numpy as np
|
|
import subprocess
|
|
import time
|
|
import sys
|
|
from config import RTSP_URL, MISTY_IP
|
|
|
|
def run_test_inference():
|
|
print(f"--- Finaler Test-Lauf (SDK-Struktur) ---")
|
|
|
|
# Modell laden
|
|
print("Lade Whisper-Modell...")
|
|
model = whisper.load_model("base")
|
|
|
|
# FFmpeg-Befehl mit mehr "Geduld" (analyzeduration & probesize)
|
|
# Das hilft, wenn Misty den Stream langsam startet
|
|
command = [
|
|
'ffmpeg',
|
|
'-rtsp_transport', 'tcp',
|
|
'-analyzeduration', '5000000',
|
|
'-probesize', '5000000',
|
|
'-i', RTSP_URL,
|
|
'-ar', '16000',
|
|
'-ac', '1',
|
|
'-f', 's16le',
|
|
'-'
|
|
]
|
|
|
|
print(f"\nVersuche Verbindung zu: {RTSP_URL}")
|
|
# Startet den Prozess
|
|
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
|
|
audio_buffer = []
|
|
print("[MISTY HÖRT ZU] Sprich jetzt... (Sammle 10 Sekunden Audio)")
|
|
|
|
start_time = time.time()
|
|
try:
|
|
# Wir sammeln jetzt erst mal stumpf 10 Sekunden, um den Puffer zu füllen
|
|
while time.time() - start_time < 10:
|
|
raw_chunk = process.stdout.read(3200)
|
|
if raw_chunk:
|
|
audio_buffer.append(np.frombuffer(raw_chunk, dtype=np.int16))
|
|
sys.stdout.write(".")
|
|
sys.stdout.flush()
|
|
|
|
process.terminate()
|
|
|
|
if not audio_buffer:
|
|
# Wenn nichts kam, schauen wir in den Error-Log von FFmpeg
|
|
_, stderr = process.communicate()
|
|
print(f"\n❌ FFmpeg Fehler-Log:\n{stderr.decode()}")
|
|
return
|
|
|
|
print("\n\nAnalyse startet...")
|
|
full_audio = np.concatenate(audio_buffer).astype(np.float32) / 32768.0
|
|
result = model.transcribe(full_audio, language="de")
|
|
|
|
print("\n" + "="*40)
|
|
print(f"ERGEBNIS: {result['text'].strip()}")
|
|
print("="*40 + "\n")
|
|
|
|
except Exception as e:
|
|
print(f"Fehler: {e}")
|
|
finally:
|
|
process.kill()
|
|
|
|
if __name__ == "__main__":
|
|
run_test_inference()
|