File size: 5,692 Bytes
c50d138
 
 
 
 
 
 
 
 
 
f384976
c50d138
 
 
f384976
c5a5632
c50d138
 
f384976
 
 
c50d138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f384976
 
 
 
c5a5632
f384976
c5a5632
f384976
 
 
 
c50d138
f384976
c50d138
 
 
 
 
c5a5632
c50d138
 
 
 
 
 
 
c5a5632
c50d138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0a2931
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import streamlit as st
from crewai import Agent, Task, Crew
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import WebBaseLoader
import requests
import os
import speech_recognition as sr
from pydub import AudioSegment
import tempfile
import litellm

# Configuration
NASA_API_URL = "https://api.nasa.gov/planetary/apod?api_key=DEMO_KEY"
HF_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
LLM_MODEL = "huggingface/HuggingFaceH4/zephyr-7b-beta"
HUGGINGFACE_API_TOKEN = os.getenv("HUGGINGFACE_API_KEY")

# Set Hugging Face API token for litellm
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACE_API_TOKEN  

# Language Configuration
LANGUAGE_CODES = {
    'English': 'en-US',
    'Spanish': 'es-ES',
    'French': 'fr-FR',
    'German': 'de-DE',
    'Chinese': 'zh-CN',
    'Arabic': 'ar-SA'
}

def speech_to_text(audio_file, language_code):
    """Convert uploaded audio file to text"""
    recognizer = sr.Recognizer()
    try:
        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
            tmp_file.write(audio_file.getvalue())
            audio_path = tmp_file.name

        # Convert to WAV if necessary
        if not audio_path.endswith('.wav'):
            audio = AudioSegment.from_file(audio_path)
            wav_path = audio_path + ".wav"
            audio.export(wav_path, format="wav")
            audio_path = wav_path

        with sr.AudioFile(audio_path) as source:
            audio_data = recognizer.record(source)
            return recognizer.recognize_google(audio_data, language=language_code)
    
    except Exception as e:
        st.error(f"Audio processing error: {str(e)}")
        return ""
    finally:
        if os.path.exists(audio_path):
            os.remove(audio_path)

def get_nasa_data():
    try:
        return requests.get(NASA_API_URL, timeout=10).json()
    except Exception as e:
        return {"error": f"NASA API Error: {str(e)}"}

def load_knowledge_base():
    try:
        loader = WebBaseLoader(["https://mars.nasa.gov/news/"])
        docs = loader.load()[:3]
        embeddings = HuggingFaceEmbeddings(model_name=HF_MODEL_NAME)
        return FAISS.from_documents(docs, embeddings)
    except Exception as e:
        return None

def call_llm(prompt):
    """Correctly call Hugging Face models using litellm"""
    try:
        response = litellm.completion(
            model=LLM_MODEL,
            messages=[{"role": "user", "content": prompt}],
            api_key=HUGGINGFACE_API_TOKEN
        )
        return response["choices"][0]["message"]["content"]
    except Exception as e:
        return f"LLM Error: {str(e)}"

def setup_agents(language='en'):
    researcher = Agent(
        role="Multilingual Space Analyst",
        goal="Analyze and validate space information",
        backstory="Expert in multilingual space data analysis with NASA mission experience.",
        verbose=True,
        llm="huggingface/HuggingFaceH4/zephyr-7b-beta"  # βœ… Now correctly set as a model string
    )

    educator = Agent(
        role="Bilingual Science Educator",
        goal=f"Explain complex concepts in {language} using simple terms",
        backstory=f"Multilingual science communicator specializing in {language} explanations.",
        verbose=True,
        llm="huggingface/HuggingFaceH4/zephyr-7b-beta"  # βœ… Now correctly set as a model string
    )

    return researcher, educator

def process_question(question, target_lang='en'):
    try:
        nasa_data = get_nasa_data()
        vector_store = load_knowledge_base()
        researcher, educator = setup_agents(target_lang)

        research_task = Task(
            description=f"""Research: {question}
            NASA Context: {nasa_data.get('explanation', '')}
            Language: {target_lang}""",
            agent=researcher,
            expected_output="3 verified technical points",
            output_file="research.md"
        )

        explain_task = Task(
            description=f"Explain in {target_lang} using simple terms and analogies",
            agent=educator,
            expected_output="2-paragraph answer in requested language",
            context=[research_task]
        )

        crew = Crew(
            agents=[researcher, educator],
            tasks=[research_task, explain_task],
            verbose=True
        )

        return crew.kickoff()
    except Exception as e:
        return f"Error: {str(e)}"

# Streamlit Interface
st.title("πŸš€ Multilingual Space Agent")
st.markdown("### Ask space questions in any language!")

# Single language selection for both input and output
selected_lang = st.selectbox("Select Language", list(LANGUAGE_CODES.keys()))
lang_code = LANGUAGE_CODES[selected_lang].split('-')[0]  # Extract base language code

# Input Method
input_method = st.radio("Input Method", ["Text", "Audio File"])

question = ""
if input_method == "Text":
    question = st.text_input(f"Your space question in {selected_lang}:", "")
else:
    audio_file = st.file_uploader("Upload audio file", type=["wav", "mp3", "ogg"])

    if audio_file is not None:
        with st.spinner("Processing audio..."):
            question = speech_to_text(audio_file, LANGUAGE_CODES[selected_lang])
            if question:
                st.text_area("Transcribed Text", value=question, height=100)

if question:
    with st.spinner("Analyzing with AI agents..."):
        answer = process_question(question, lang_code)
        st.markdown(f"### 🌍 Answer ({selected_lang}):")
        st.markdown(answer)

st.markdown("---")
st.markdown("*Powered by NASA API & Open Source AI*")