-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathserver.py
77 lines (58 loc) · 2.07 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import cld3
from flask import Flask, request, send_file, jsonify
import torch
from transformers import pipeline
import os
import json
import base64
app = Flask(__name__)
def remember_language(lang: str):
with open('last-language', 'w') as f:
f.writelines([lang])
def last_language() -> str | None:
if not os.path.exists('last-language'):
return None
with open('last-language', 'r') as f:
return f.readline()
device = "cuda:0" if torch.cuda.is_available() else "cpu"
PIPE = pipeline(
"automatic-speech-recognition",
model="openai/whisper-medium",
chunk_length_s=30,
device=device,
)
@app.route('/v1/images/generations', methods=['POST'])
def image():
data = request.get_json() # Get the JSON input from the POST request
prompt = data['prompt'] # Extract the 'input' field
os.system(f"./gen_image.sh \"{prompt}\"")
file_text = open('image.png', 'rb')
file_read = file_text.read()
b64img = base64.encodebytes(file_read).decode('utf-8')
o = {"data": [{'b64_json': b64img}]}
return jsonify(o)
@app.route('/v1/audio/speech', methods=['POST'])
def speech():
data = request.get_json() # Get the JSON input from the POST request
text = data['input'] # Extract the 'input' field
prediction = cld3.get_language(text)
lang = prediction.language
if (prediction.is_reliable):
remember_language(lang)
else:
last_lang = last_language()
if not last_language == None:
lang = last_lang
with open('textfile.txt', 'w') as f: # Write the extracted text to a file
f.write(text)
os.system(f"./tts.sh {lang}")
return send_file('output.wav', mimetype='audio/x-wav') # Return the audio file in the HTTP response
@app.route('/v1/audio/transcriptions', methods=['POST'])
def transcribe():
file = request.files['file']
file.save('upload.wav')
transcription = PIPE('upload.wav')
return json.dumps(transcription), 200
if __name__ == '__main__':
app.run(port=5000, host="0.0.0.0")
app.config['UPLOAD_FOLDER'] = '.'