init

2024-03-06 11:53:36 +01:00
commit 73ddf4661c
9 changed files with 436 additions and 0 deletions
--- a/app.py
+++ b/app.py
@@ -0,0 +1,236 @@
+from flask import session, Flask, render_template, request, jsonify, redirect, url_for, session, send_from_directory, make_response
+import requests
+import os
+import re
+import markdown
+import yt_dlp
+from datetime import datetime
+from werkzeug.utils import secure_filename
+from dotenv import load_dotenv
+
+app = Flask(__name__)
+app.config['UPLOAD_FOLDER'] = 'uploads/'
+app.config['MAX_CONTENT_LENGTH'] = 32 * 1024 * 1024  # 16 MB limit
+
+OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
+FLASK_SECRET_KEY = os.getenv('FLASK_SECRET_KEY')
+
+if not (OPENAI_API_KEY and FLASK_SECRET_KEY):
+    from dotenv import load_dotenv
+    load_dotenv()
+    OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
+    FLASK_SECRET_KEY = os.getenv('FLASK_SECRET_KEY')
+
+if not (OPENAI_API_KEY and FLASK_SECRET_KEY):
+    raise Exception("FLASK_SECRET_KEY or OPENAI_API_KEY not found in the environment or .env file")
+
+
+app.secret_key = FLASK_SECRET_KEY
+
+
+os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
+downloads_path = os.path.join(app.static_folder, 'downloads')
+os.makedirs(downloads_path, exist_ok=True)
+
+
+
+@app.route('/youtube', methods=['POST'])
+def download_youtube_audio():
+    youtube_url = request.form['youtube_url']
+    summarize_bool = request.form.get('summarize', 'false')
+    audio_file_path = download_audio_from_youtube(youtube_url)
+    if audio_file_path:
+        transcription = transcribe_audio(audio_file_path)        
+        if summarize_bool:
+            transcription = summarize(transcription)
+        session['youtube_transcript']=transcription
+        return redirect(url_for('index'))
+    else:
+        return "Failed to download the audio from the provided YouTube URL."
+
+def download_audio_from_youtube(url):
+    ydl_opts = {
+        'format': 'bestaudio/best',
+        'outtmpl': 'uploads/%(id)s.%(ext)s',
+    }
+    
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        try:
+            info_dict = ydl.extract_info(url, download=True)
+            audio_file = ydl.prepare_filename(info_dict)
+            return audio_file
+        except Exception as e:
+            print(f"Error downloading audio: {str(e)}")
+            return None
+
+@app.route('/')
+def index():
+    files = os.listdir(downloads_path)
+    files = [file for file in files if file.endswith('.mp3')]
+    # Retrieve the transcription from query parameters, if present
+    transcription = markdown.markdown(request.args.get('transcription', ''))
+    download_url=request.args.get('download_url', '')
+    youtube_transcript = markdown.markdown(session.get('youtube_transcript',''))
+    return render_template('index.html', transcription=transcription,download_url=download_url, files=files, youtube_transcript=youtube_transcript)
+
+
+@app.route('/upload', methods=['POST'])
+def upload_file():
+    if 'file' not in request.files:
+        return redirect(request.url)
+    file = request.files['file']
+    if file.filename == '':
+        return redirect(request.url)
+    if file:
+        filename = secure_filename(file.filename)
+        file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
+        file.save(file_path)
+        transcription = transcribe_audio(file_path)
+        # Redirecting to the index route with the transcription result
+        return redirect(url_for('index', transcription=transcription))
+
+
+def beautify_text(text):
+    api_url = "https://api.openai.com/v1/chat/completions"
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {OPENAI_API_KEY}"
+    }
+    payload = {
+        "model": "gpt-4-turbo-preview",
+        "messages": [
+            {
+                "role": "system",
+                "content": "You are a helpful assistant."
+            },
+            {
+                "role": "user",
+                "content": f"Rend ce texte plus aéré et lisible, mais ne le résume pas et ne le tronque pas: {text}"
+            }
+        ]
+    }
+    
+    response = requests.post(api_url, json=payload, headers=headers)
+    if response.status_code == 200:
+        response_data = response.json()
+        # Extracting the beautified text from the response
+        beautified_text = response_data['choices'][0]['message']['content']
+        return beautified_text
+    else:
+        return f"Failed to beautify text. Status code: {response.status_code}"
+
+
+def summarize(text):
+    api_url = "https://api.openai.com/v1/chat/completions"
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {OPENAI_API_KEY}"
+    }
+    payload = {
+        "model": "gpt-4-turbo-preview",
+        "messages": [
+            {
+                "role": "system",
+                "content": "You are a helpful assistant."
+            },
+            {
+                "role": "user",
+                "content": f"Résume ce texte, et s'il n'est pas en français, traduis-le: {text}"
+            }
+        ]
+    }
+    
+    response = requests.post(api_url, json=payload, headers=headers)
+    if response.status_code == 200:
+        response_data = response.json()
+        # Extracting the beautified text from the response
+        summarized = response_data['choices'][0]['message']['content']
+        return summarized
+    else:
+        return f"Failed to summarize text. Status code: {response.status_code}"
+
+def transcribe_audio(file_path):
+    headers = {
+        'Authorization': f'Bearer {OPENAI_API_KEY}'
+    }
+    
+    # Assuming 'file_path' is the path to the audio file you want to transcribe.
+    try:
+        model_parameters= {"model":"whisper-1", "language": "fr"}
+        response = requests.post(
+            'https://api.openai.com/v1/audio/transcriptions',
+            headers=headers,
+            files={'file': open(file_path, 'rb')},
+            data=model_parameters
+        )
+        
+        if response.status_code == 200:
+            response_json = response.json()
+            transcription_text = response_json.get('text', 'Transcription not found or not available.')
+            return transcription_text
+        else:
+            error_message = f"Failed to transcribe audio. Status code: {response.status_code}."
+            try:
+                response_json = response.json()
+                if 'error' in response_json:
+                    error_detail = response_json['error']['message']
+                    error_message += f" Error detail: {error_detail}"
+            except ValueError:
+                error_message += f" Response: {response.text[:200]}..."
+            return error_message
+    except Exception as e:
+        # Catch any other exceptions, such as network issues or file errors
+        return f"An exception occurred: {str(e)}"
+
+def sanitize_filename(text):
+    """
+    Sanitize the text to make it safe for use in a filename.
+    Remove non-alphanumeric characters and replace spaces with underscores.
+    """
+    text = re.sub(r'\W+', ' ', text)
+    text = text.strip().replace(' ', '_')
+    return text
+
+@app.route('/synthesize', methods=['POST'])
+def synthesize():
+    text = request.form['text-to-synthesize']
+    selected_voice = request.form.get('voice', 'nova')  # Default to 'nova' if not specified
+    file_path = synthesize_speech(text,selected_voice)
+    if file_path:
+        filename = os.path.basename(file_path)
+        return redirect(url_for('index', download_url=filename))
+
+def synthesize_speech(text, voice="nova"):
+    first_words = ' '.join(text.split()[:5])
+    sanitized_text = sanitize_filename(first_words)
+    current_datetime = datetime.now().strftime('%Y%m%d-%H%M%S')
+    
+    filename = f"{sanitized_text}_{current_datetime}.mp3"
+    file_path = os.path.join('static', 'downloads', filename)
+
+
+    api_url = "https://api.openai.com/v1/audio/speech"
+    headers = {
+        "Authorization": f"Bearer {OPENAI_API_KEY}",
+        "Content-Type": "application/json"
+    }
+
+    data = {
+        "model": "tts-1",
+        "input": text,
+        "voice": voice, 
+        "response_format": "mp3", 
+        "speed": 1 
+    }
+    response = requests.post(api_url, json=data, headers=headers)
+
+    if response.status_code == 200:
+        file_path = os.path.join('static', 'downloads', filename)
+        with open(file_path, 'wb') as audio_file:
+            audio_file.write(response.content)
+        return file_path
+    else:
+        return None
+
+if __name__ == '__main__':
+    app.run(debug=True)